Welcome, guest | Sign In | My Account | Store | Cart

This small module builds an urllib2 opener that can be used to make a connection through a proxy using the http CONNECT method (that can be used to proxy SSLconnections). The current urrlib2 seems to not support this method.

Python, 92 lines
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# urllib2 opener to connection through a proxy using the CONNECT method, (useful for SSL)
# tested with python 2.4

import urllib2
import urllib
import httplib
import socket


class ProxyHTTPConnection(httplib.HTTPConnection):

    _ports = {'http' : 80, 'https' : 443}


    def request(self, method, url, body=None, headers={}):
        #request is called before connect, so can interpret url and get
        #real host/port to be used to make CONNECT request to proxy
        proto, rest = urllib.splittype(url)
        if proto is None:
            raise ValueError, "unknown URL type: %s" % url
        #get host
        host, rest = urllib.splithost(rest)
        #try to get port
        host, port = urllib.splitport(host)
        #if port is not defined try to get from proto
        if port is None:
            try:
                port = self._ports[proto]
            except KeyError:
                raise ValueError, "unknown protocol for: %s" % url
        self._real_host = host
        self._real_port = port
        httplib.HTTPConnection.request(self, method, url, body, headers)
        

    def connect(self):
        httplib.HTTPConnection.connect(self)
        #send proxy CONNECT request
        self.send("CONNECT %s:%d HTTP/1.0\r\n\r\n" % (self._real_host, self._real_port))
        #expect a HTTP/1.0 200 Connection established
        response = self.response_class(self.sock, strict=self.strict, method=self._method)
        (version, code, message) = response._read_status()
        #probably here we can handle auth requests...
        if code != 200:
            #proxy returned and error, abort connection, and raise exception
            self.close()
            raise socket.error, "Proxy connection failed: %d %s" % (code, message.strip())
        #eat up header block from proxy....
        while True:
            #should not use directly fp probablu
            line = response.fp.readline()
            if line == '\r\n': break


class ProxyHTTPSConnection(ProxyHTTPConnection):
    
    default_port = 443

    def __init__(self, host, port = None, key_file = None, cert_file = None, strict = None):
        ProxyHTTPConnection.__init__(self, host, port)
        self.key_file = key_file
        self.cert_file = cert_file
    
    def connect(self):
        ProxyHTTPConnection.connect(self)
        #make the sock ssl-aware
        ssl = socket.ssl(self.sock, self.key_file, self.cert_file)
        self.sock = httplib.FakeSocket(self.sock, ssl)
        
                                       
class ConnectHTTPHandler(urllib2.HTTPHandler):

    def do_open(self, http_class, req):
        return urllib2.HTTPHandler.do_open(self, ProxyHTTPConnection, req)


class ConnectHTTPSHandler(urllib2.HTTPSHandler):

    def do_open(self, http_class, req):
        return urllib2.HTTPSHandler.do_open(self, ProxyHTTPSConnection, req)


if __name__ == '__main__':
    
    import sys
    
    opener = urllib2.build_opener(ConnectHTTPHandler, ConnectHTTPSHandler)
    urllib2.install_opener(opener)
    req = urllib2.Request(url='https://192.168.1.1')
    req.set_proxy('192.168.1.254:3128', 'https')
    f = urllib2.urlopen(req)
    print f.read()

This module provides an openers that can be used with urrlib2 to make a connection through a proxy that supports the CONNECT method. The ProxyHTTPConnection class takes care of connecting to the proxy, sending the CONNECT string and interpreting results. In case of success (http code 200 reply) we are connected to remote host and everything can go as usual. When connecting to an SSL enabled host ProxyHTTPSConnection, after a successfull connect, makes the socket ssl-aware. Currently no authentication scheme is supported, but can be easily added (maybe even resorting to urrlib2 support).

15 comments

Norm Petterson 18 years ago  # | flag

Modification to use with https redirection (required for libgmail).

Thanks for the nice tool. However, https redirection is not handled,
since urllib2 doesn't let you do set_proxy on the redirection
requests. I modified the above opener accordingly:

class ConnectHTTPHandler(urllib2.HTTPHandler):

    def __init__(self, proxy=None, debuglevel=0):
        self.proxy = proxy
        urllib2.HTTPHandler.__init__(self, debuglevel)

    def do_open(self, http_class, req):
        if self.proxy is not None:
            req.set_proxy(self.proxy, 'http')
        return urllib2.HTTPHandler.do_open(self, ProxyHTTPConnection, req)

class ConnectHTTPSHandler(urllib2.HTTPSHandler):

    def __init__(self, proxy=None, debuglevel=0):
        self.proxy = proxy
        urllib2.HTTPSHandler.__init__(self, debuglevel)

    def do_open(self, http_class, req):
        if self.proxy is not None:
            req.set_proxy(self.proxy, 'https')
        return urllib2.HTTPSHandler.do_open(self, ProxyHTTPSConnection, req)

Note that you specify your proxy with urllib2.build_opener, e.g.,

    p = '127.0.0.1:5865'
    opener = urllib2.build_opener(
        ConnectHTTPHandler(proxy=p), ConnectHTTPSHandler(proxy=p))

Also, of course, you no longer need to do req.set_proxy in your own
code.
yin sun 17 years, 10 months ago  # | flag

HTTP proxy with CONNECT support to tunnel HTTPS request. To work with "HTTP proxy with CONNECT support to tunnel HTTPS request", a ProxyHTTPSConnertion is needed inside ConnectHTTPHandler

This is the code change

class ConnectHTTPHandler(urllib2.HTTPHandler):
 def do_open(self, http_class, req):
   return urllib2.HTTPHandler.do_open(self, ProxyHTTPSConnection, req)
Joey Joey 16 years, 5 months ago  # | flag

urllib2.HTTPSHandler error. i'm getting this error:

Traceback (most recent call last):
  File "test.py", line 77, in ?
    class ConnectHTTPSHandler(urllib2.HTTPSHandler):
AttributeError: 'module' object has no attribute 'HTTPSHandler'

I'm using Python 2.4

sourabh modi 15 years, 4 months ago  # | flag

i am behind a proxy which requires authentication how to use opener for that

Bryan Schmersal 15 years ago  # | flag

To do proxy authentication I edited connect as follows:

def connect(self):
    httplib.HTTPConnection.connect(self)
    #send proxy CONNECT request
    connmsg = 'CONNECT %s:%s HTTP/1.1\r\n' % (self._real_host, self._real_port)
    connmsg += 'Proxy-Connection: keep-alive\r\n'
    connmsg += 'Connection: keep-alive\r\n'
    connmsg += 'Host: %s\r\n' % self._real_host
    connmsg += 'User-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en_us) AppleWebKit/525.27.1 (KHTML, like Gecko) Version/3.2.1 Safari/525.27.1\r\n'
    if self._proxyuser:
       connmsg += 'Proxy-Authorization: Basic %s\r\n' % base64.b64encode('%s:%s' % (self._proxyuser, self._proxypass))
    connmsg += '\r\n'
    logging.debug(connmsg)
    self.send(connmsg)
    #expect a HTTP/1.0 200 Connection established
    response = self.response_class(self.sock, strict=self.strict, method=self._method)
    (version, code, message) = response._read_status()
    #probably here we can handle auth requests...
    if code != 200:
        #proxy returned and error, abort connection, and raise exception
        self.close()
        raise socket.error, "Proxy connection failed: %d %s" % (code, message.strip())
    #eat up header block from proxy....
    while True:
        #should not use directly fp probably
        line = response.fp.readline()
        logging.debug(line)
        if line == '\r\n': break

No matter what I tried, I couldn't get the auth handlers to do the right thing. I can't wait until I need to add SOCKS support....

Andy Newport 14 years, 11 months ago  # | flag

I have posted a working Python 3 version here. It was generated using the 2to3 syntax tool then all the bits that didn't work (ssl, http.client, utf-8 vs ascii issues) were fixed. It also shows how to add handlers to the chain for the proxy and basic authentication.

Cliffs notes version: Send the connect string (and read the response) in utf-8 format: self.send("CONNECT %s:%d HTTP/1.0\r\n\r\n" % (self._real_host, self._real_port)) becomes: connect_string="CONNECT {0}:{1} HTTP/1.0\r\n\r\n".format(self._real_host, self._real_port) self.send(connect_string.encode('utf-8'))

Wrap the socket for ssl using the ssl class (instead of a method in socket class): ssl = socket.ssl(self.sock, self.key_file, self.cert_file) self.sock = httplib.FakeSocket(self.sock, ssl) becomes: self.sock = ssl.wrap_socket(self.sock, self.key_file, self.cert_file)

I have tested this for get and post.

Rm4dillo 13 years, 4 months ago  # | flag

Hello,

For better compatibility with servers, the Request URI should not contain the location (scheme, fqdn, port, etc...) but only the path, therefore the following code:

httplib.HTTPConnection.request(self, method, url, body, headers)

must be replaced by:

path = urllib2.urlparse.urlparse(url).path
httplib.HTTPConnection.request(self, method, path, body, headers)

Regards, Rm4dillo

Scott Milliken 12 years, 11 months ago  # | flag

Take care with this loop:

while True:
            #should not use directly fp probablu
            line = response.fp.readline()
            if line == '\r\n':
                break

It is not uncommon that your proxy might return an empty response, which would cause this loop to never terminate.

boussard 12 years, 10 months ago  # | flag

You can use httpsproxy_urllib2 (available on pypi) for python2.4 or python2.5 instead this recipe. For me it works better.

proxy = urllib2.ProxyHandler({'https': proxy_config['http']})
OPENER = urllib2.build_opener(proxy)
urllib2.install_opener(OPENER)
req = urllib2.Request(url = url, data = data)
resp = urllib2.urlopen(req)
Matt 12 years, 9 months ago  # | flag

@boussard

Works great! Thanks for sharing!

Philippe THIRION 10 years ago  # | flag

Modifications for python 2.6 compatibility:

I have tried to run this code using python 2.6. I have met some issues. Here is the solution I found.

1) Add timeout extra parameter in ProxyHTTPSConnection init :

#def __init__(self, host, port = None, key_file = None, cert_file = None, strict = None):
# modif for python 2.6
def __init__(self, host, port = None, key_file = None, cert_file = None, strict = None, timeout = 0):

2) Use ssl.wrap_socket package instead of httplib.FakeSocket:

import ssl

...

#ssl = socket.ssl(self.sock, self.key_file, self.cert_file)
#self.sock = httplib.FakeSocket(self.sock, ssl)
# modif for python 2.6
self.sock = ssl.wrap_socket(self.sock, keyfile=self.key_file, certfile=self.cert_file)
Jean-Marc Gillet 9 years, 11 months ago  # | flag

I learned a lot with Alessandro's initial code. However the request() part wasn't working for me. Here is my modest redesign that works perfectly for my Python 2.7 and Cntlm HTTP proxy combination. Authentication is not covered since Cntlm does it for me. My apologies if the coding style is not orthodox. See the next post for a typical use as I hit the 3K limit. Any code criticism will be more than welcome.

import os
import httplib
import urllib2
import socket
import ssl
from urlparse import urlsplit
# -------------------------------------------------------------------------
class https_conn_proxy(httplib.HTTPConnection):
 def set_tunnel(self,host,headers=None):   # complete override
  self._tunnel_hostport=host
  if ":" not in host:self._tunnel_hostport+=":443"
 def connect(self):
  httplib.HTTPConnection.connect(self)
  self.send("CONNECT "+self._tunnel_hostport+" HTTP/1.0\r\n\r\n")
  response=self.response_class(self.sock,
   strict=self.strict,method=self._method)
  (__,code,message)=response._read_status()
  if code!=200:
   self.close()
   raise socket.error,\
    "Tunnel connection via HTTP proxy failed - %u %s"%(code,message.strip())
  """Eat up the rest of the headers"""
  line="."
  while line and line!="\r\n":
   line=response.fp.readline(32768)
   if len(line)>=32768:raise LineTooLong("header line")

  self.sock=ssl.wrap_socket(self.sock)
  """Impossible to add key and certificate with wrap_socket()
  unless AbstractHTTPHandler.do_open() is rewritten
  to add that info when calling the custom class.
  """
# class https_conn_proxy()
# -------------------------------------------------------------------------
class https_handler_proxy(urllib2.HTTPSHandler):
 def __init__(self,proxy,
  key_file=None,cert_file=None,strict=None,debuglevel=0):
  s=urlsplit(proxy)
  if s.scheme!="http" or not s.netloc:
   raise ValueError,"Invalid HTTP proxy URL "+proxy
  self.proxy=s.netloc
  urllib2.HTTPSHandler.__init__(self,debuglevel)
 def do_open(self,http_class,req):
  if self.proxy:req.set_proxy(self.proxy,"http")
  return urllib2.HTTPSHandler.do_open(self,https_conn_proxy,req)
# class https_handler_proxy()
# -------------------------------------------------------------------------
def wget_init(proxy=None):
 if proxy:
  os.unsetenv("http_proxy")
  os.unsetenv("https_proxy")
  urllib2.install_opener(urllib2.build_opener(\
   urllib2.ProxyHandler({"http":proxy}),
   https_handler_proxy(proxy)))
def wget_page(url,t=10):
 return urllib2.urlopen(urllib2.Request(url),timeout=t)
Jean-Marc Gillet 9 years, 11 months ago  # | flag

Typical use :

wget_init("http://localhost:3128/")   # the Cntlm proxy
try:
 wget_page("https://docs.python.org/2/library/")
except urllib2.URLError:
 sys.stderr.write("Can't connect\n")
 sys.exit(8)
charset=page.headers.getparam("charset")   # UTF-8 most of the time
p=some_HTMLParser()
for line in page:
 p.feed(line.decode(charset))
 """p typically does a .encode("ascii","replace")
 within .handle_data()
 """
del p,page
Jean-Marc Gillet 9 years, 11 months ago  # | flag

Let's also monitor http://bugs.python.org/issue7776 !

Jean-Marc Gillet 9 years, 10 months ago  # | flag

My code still runs fine with Python 2.7.7 that has the fix of issue 7776.