Welcome, guest | Sign In | My Account | Store | Cart
#!/usr/bin/env python

import urllib
import urlparse
import os
import re
import logging

_sub = re.compile("([^\\-/a-zA-Z0-9\\.])").sub
def _sc(ch):
    return "_%02x" % ord(ch.group(1))

def normalize_path(path, sub=_sub, sc=_sc):
    if "." not in path:
        path += "/"
    if path.endswith("/"):
        path += "index.html"
    if path and path[0] == "/":
        path = path[1:]
    path = sub(sc, path)
    return path

def localize_path(url):
    splitd = urlparse.urlsplit(url)
    if splitd[3]:
        return "%s?%s" % splitd[2:4]
    else:
        return splitd[2]

def exists_relative(url):
    path = localize_path(url)
    path = normalize_path(path)
    return os.path.exists(path)

def fetch_relative(url, proxies=None, postfetch=None):
    """postfetch is a callback that receives fetched data (as string)"""
    path = localize_path(url)
    path = normalize_path(path)
    if os.path.exists(path):
        if postfetch:
            logging.debug("reprocessing file %s" % path)
            f = open(path, "rb")
            data = f.read()
            f.close()
            postfetch(data)
        return False
    logging.debug("fetching %s" % url)
    f = urllib.urlopen(url, proxies=proxies)
    data = f.read()
    f.close()
    head, tail = os.path.split(path)
    if not os.path.exists(head):
        os.makedirs(head)
    f = open(path, "wb")
    f.write(data)
    f.close()
    if postfetch:
        postfetch(data)
    return True

History