from httplib import HTTPConnection, _CS_IDLE
import urlparse
def pipeline(domain,pages,max_out_bound=4,debuglevel=0):
pagecount = len(pages)
conn = HTTPConnection(domain)
conn.set_debuglevel(debuglevel)
respobjs = [None]*pagecount
finished = [False]*pagecount
data = [None]*pagecount
headers = {'Host':domain,'Content-Length':0,'Connection':'Keep-Alive'}
while not all(finished):
# Send
out_bound = 0
for i,page in enumerate(pages):
if out_bound >= max_out_bound:
break
elif page and not finished[i] and respobjs[i] is None:
if debuglevel > 0:
print 'Sending request for %r...' % (page,)
conn._HTTPConnection__state = _CS_IDLE # FU private variable!
conn.request("GET", page, None, headers)
respobjs[i] = conn.response_class(conn.sock, strict=conn.strict, method=conn._method)
out_bound += 1
# Try to read a response
for i,resp in enumerate(respobjs):
if resp is None:
continue
if debuglevel > 0:
print 'Retrieving %r...' % (pages[i],)
out_bound -= 1
skip_read = False
resp.begin()
if debuglevel > 0:
print ' %d %s' % (resp.status, resp.reason)
if 200 <= resp.status < 300:
# Ok
data[i] = resp.read()
cookie = resp.getheader('Set-Cookie')
if cookie is not None:
headers['Cookie'] = cookie
skip_read = True
finished[i] = True
respobjs[i] = None
elif 300 <= resp.status < 400:
# Redirect
loc = resp.getheader('Location')
respobjs[i] = None
parsed = loc and urlparse.urlparse(loc)
if not parsed:
# Missing or empty location header
data[i] = (resp.status, resp.reason)
finished[i] = True
elif parsed.netloc != '' and parsed.netloc != host:
# Redirect to another host
data[i] = (resp.status, resp.reason, loc)
finished[i] = True
else:
path = urlparse.urlunparse(parsed._replace(scheme='',netloc='',fragment=''))
if debuglevel > 0:
print ' Updated %r to %r' % (pages[i],path)
pages[i] = path
elif resp.status >= 400:
# Failed
data[i] = (resp.status, resp.reason)
finished[i] = True
respobjs[i] = None
if resp.will_close:
# Connection (will be) closed, need to resend
conn.close()
if debuglevel > 0:
print ' Connection closed'
for j,f in enumerate(finished):
if not f and respobj[j] is not None:
if debuglevel > 0:
print ' Discarding out-bound request for %r' % (pages[j],)
respobj[j] = None
break
elif not skip_read:
resp.read() # read any data
if any(not f and respobjs[j] is None for j,f in enumerate(finished)):
# Send another pending request
break
else:
break # All respobjs are None?
return data
if __name__ == '__main__':
domain = 'en.wikipedia.org'
pages = ('/wiki/HTTP_pipelining', '/wiki/HTTP', '/wiki/HTTP_persistent_connection')
data = pipeline(domain,pages,max_out_bound=2,debuglevel=1)
for i,page in enumerate(data):
print
print '==== Page %r ====' % (pages[i],)
print page[:512]