give this program an url from a pciture and it will try to download all the list of existing picture exemple : downloader.py http://www.example.com/picture01.jpg
| Python |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 | """
By bussiere bussiere @at gmail.com
"""
__Author__ ="bussiere"
__Email__ = "bussiere @at gmail.com"
__Titre__ = "downloading a serial list of picture from the web"
__Description__ = "give this program an url from a pciture and it will try to download all the list of existing picture exemple : downloader.py http://www.example.com/picture01.jpg"
__Discussion__ = "some times i've got a link like http://www.example.com/picture01.jpg and i guess that there are more picture of it this program will try to find them all and download them all"
__Tags__ ="download picture pictures jpg JPG url list series serie"
import re
import time,datetime
import urllib2
def download_pictures(url,start=1,limit=999,trial=3):
#we compile the regular exepression that will get the number and the point of the url as 01. in the http://www.example.com/picture01.jpg
reg = re.compile("[0-9]+.\.")
# here we compile a regular expression for find html in data
reghtml = re.compile('HTML')
# we search the numbers with . in the url
result = reg.search(url)
# we get the begining position and the ending position of the numbers with .
begin , end = result.span()
# we don't need the . at the end so we finish it earlier
end = end - 1
#this string wil contain some zero because sometimes in the url it's 0001.jpg or 01.jpg or 1.jpg
zero = ''
#the i will count the zero
i = 0
#we put how 0 in zero that we have number in the url save one
while i < end -begin-1 :
zero += '0'
i += 1
#here we get the extension a the end of the url .jpg here
extension = url[end:]
#count will determine how many times we have tried to download a file
count = 0
#while the download number is inferior of the limit let's goes on
while start < limit :
#we get the time
t = datetime.datetime.now()
# we make a file name with the epoch to be sure that the name will exist just one time
name = "%s.%s"%(time.mktime(t.timetuple()),extension)
# we make the url to download the file based on the url mixed with zero and the number of the download
urlbis = "%s%s%s%s"%(url[:begin],zero,start,url[end:])
#we try to download the file
try :
#here we open the url
remoteFile = urllib2.urlopen(urlbis)
#we read the file that we get
data = remoteFile.read()
# if the file does not exist sometimes we get a 404 error with the word HTML in it
#so we check for it and if it is not in.
if not reghtml.search(data) :
#we create a file with the epoch name
localFile = open(name, "wb")
# we write the data in it
localFile.write(data)
#we close it
localFile.close()
else :
#if there is the word HTML in it
#we end the loop
break
#here it is if we can't get any file at all
except :
# if the zero count equal zero we increase the count and put zero blank
if i == 0 :
zero = ''
count += 1
#here we just sustract zero
else :
# we reduce the zero counter
i = i - 1
# we substract one zero
zero = zero[:i]
# and we substract one to the counter of file to make it retry this file with one zero less
start = start - 1
# if the failure counter is equal to the trial number we get out
if count > trial :
break
#we just increase the file counter
start += 1
def main(argv=None):
# we get the argument passed on the command line
argv = sys.argv
#we initialize the argument at none
url = None
#at which number we begin to download
start = None
#how many download ?
limit = None
# how many time we will try to download
trial = None
#we get the argument in a list
options = sys.argv[1:]
#we put the option one after one and initialise them
for option in options :
if not url :
url = option
else :
if not start :
start = option
else :
if not limit :
limit = option
else :
trial = trial
#if some options ar empty we put the default value
if not start :
start = 1
if not limit :
limit = 999 + start
if not trial :
trial = 3
#we call the download picture
download_pictures(url,start,limit,trial)
if __name__ == "__main__":
import sys
#we call the main function
sys.exit(main())
|
Discussion
some times i've got a link like http://www.example.com/picture01.jpg and i guess that there are more picture of it this program will try to find them all and download them all


Sign in to comment