1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147 | #!/usr/bin/env python
from twisted.web import client, error
import os.path
import ConfigParser
import getpass, base64
import webbrowser
class HTTPProgressDownloader(client.HTTPDownloader):
def __init__(self, url, outfile, headers=None):
client.HTTPDownloader.__init__(self, url, outfile, headers=headers)
self.status = None
def noPage(self, reason): # called for non-200 responses
if self.status == '304':
print reason.getErrorMessage()
client.HTTPDownloader.page(self, '')
else:
client.HTTPDownloader.noPage(self, reason)
def gotHeaders(self, headers):
# page data is on the way
if self.status == '200':
# initialize for progress bar
if headers.has_key('content-length'):
self.totallength = int(headers['content-length'][0])
else:
self.totallength = 0
self.currentlength = 0.0
print ''
# update headers metadata
oldheaders = {}
eTag = headers.get('etag','')
if eTag:
oldheaders['etag'] = eTag[0]
modified = headers.get('last-modified','')
if modified:
oldheaders['last-modified'] = modified[0]
config = ConfigParser.ConfigParser()
config.read('metadata.ini')
if config.has_section('headers'):
config.remove_section('headers')
config.add_section('headers')
for key, value in oldheaders.items():
config.set('headers', key, value)
config.write(open('metadata.ini','w'))
return client.HTTPDownloader.gotHeaders(self, headers)
def pagePart(self, data):
if self.status == '200':
self.currentlength += len(data)
if self.totallength:
percent = "%i%%" % (
(self.currentlength/self.totallength)*100)
else:
percent = '%dK' % (self.currentLength/1000)
print "\033[1FProgress: " + percent
return client.HTTPDownloader.pagePart(self, data)
def downloadWithProgress(url, outputfile, contextFactory=None, *args, **kwargs):
scheme, host, port, path = client._parse(url)
factory = HTTPProgressDownloader(url, outputfile, *args, **kwargs)
if scheme == 'https':
from twisted.internet import ssl
if contextFactory == None :
contextFactory = ssl.ClientContextFactory()
reactor.connectSSL(host, port, factory, contextFactory)
else:
reactor.connectTCP(host, port, factory)
return factory.deferred
def downloadPage( url, outputfile, RequestHeaders):
downloadWithProgress(url, outputfile, headers=RequestHeaders).addCallback(
downloadComplete).addErrback(
handleBasicAuthentication,url,outputfile, RequestHeaders).addErrback(
handleError)
def downloadComplete(result):
print "download Complete"
reactor.stop()
def handleBasicAuthentication(failure, url, outputfile, RequestHeaders):
failure.trap(error.Error)
if failure.value.status == '401':
username = raw_input("user name:")
password = getpass.getpass("password: ")
basicAuth = base64.encodestring("%s:%s"%(username, password))
authHeader = "Basic "+basicAuth.strip()
AuthHeaders = {"Authorization": authHeader}
RequestHeaders.update(AuthHeaders)
return downloadWithProgress(url, outputfile, headers=RequestHeaders)
else:
return failure
def handleError(failure):
print "Error: ", failure.getErrorMessage()
reactor.stop()
def getRequestHeaders(url, outputfile):
# update metadata and generate request headers
RequestHeaders = {}
config = ConfigParser.ConfigParser()
if not os.path.isfile('metadata.ini'):
section = 'download-metadata'
config.add_section(section)
config.set(section, "url", url)
config.set(section, "filename", outputfile)
config.write(open('metadata.ini','w'))
else:
config.read('metadata.ini')
eTag = None
if config.has_option('headers','etag'):
eTag = config.get('headers','etag')
if eTag:
RequestHeaders['If-None-Match'] = eTag
modified = None
if config.has_option('headers','last-modified'):
modified = config.get('headers','last-modified')
if modified:
RequestHeaders['If-Modified-Since'] = modified
return RequestHeaders
if __name__ == '__main__':
import sys
from twisted.internet import reactor
url, outputfile = sys.argv[1:]
RequestHeaders = getRequestHeaders(url, outputfile)
downloadPage(url, outputfile, RequestHeaders)
reactor.run()
webbrowser.open(outputfile)
|
Comments
Line 65: currentLength should be currentlength -- all lower case
On OS X, I had to change the last line (to open the browser) to:
Sign in to comment