import sys import shutil import urllib2 import lxml.html HEADERS = { 'User-Agent': 'urllib2 (Python %s)' % sys.version.split()[0], 'Connection': 'close', } def get_favicon(url, path='favicon.ico', alt_icon_path='alticon.ico'): if not url.endswith('/'): url += '/' request = urllib2.Request(url + 'favicon.ico', headers=HEADERS) try: icon = urllib2.urlopen(request).read() except(urllib2.HTTPError, urllib2.URLError): reqest = urllib2.Request(url, headers=HEADERS) try: content = urllib2.urlopen(request).read(2048) # 2048 bytes should be enought for most of websites except(urllib2.HTTPError, urllib2.URLError): shutil.copyfile(alt_icon_path, path) return icon_path = lxml.html.fromstring(x).xpath( '//link[@rel="icon" or @rel="shortcut icon"]/@href' ) if icon_path: request = urllib2.Request(url + icon_path[:1], headers=HEADERS) try: icon = urllib2.urlopen(request).read() except(urllib2.HTTPError, urllib2.URLError): shutil.copyfile(alt_icon_path, path) return open(path, 'wb').write(icon) if __name__ == '__main__': get_favicon('http://code.activestate.com', 'favicon.ico')