Welcome, guest | Sign In | My Account | Store | Cart
#!/usr/bin/python
"""
Log resources requested by a webpage using WebKit.

Originally designed to download video files requested by Adobe Flash videos.
"""   
import sys
import re

# Third-party modules
import webkit
import gtk

# Supported sites (key: URL regexp, value: video URL regexp)
SITES = {
    "youtube\.com/": "youtube\.com/videoplayback",
    "blip\.tv/": "blip\.tv/file/get/",
}

def debug(line):
    """Write debug line to standard error."""
    sys.stderr.write("--- %s\n" % line)

def first(it):
    """Return first element in iterator (None if empty)."""
    return next(it, None)

def on_request(view, frame, resource, request, response, 
               resource_regexp, skip_regexp=None):
    """Check if requested resource matches the video resource_regexp regexp."""
    url = request.get_uri()
    message = request.get_property("message")
    if not message:
        return
    method = message.get_property("method")
    if skip_regexp and skip_regexp.search(url):
        # cancel the request
        request.set_uri("about:blank")
        return
    debug("request: %s %s" % (method, url))
    if resource_regexp and re.search(resource_regexp, url):
        debug("videofile match: %s" % url)
        print url
        gtk.main_quit()

def create_webview():
    """Create a gtk.Window containing a WebKit webview."""
    view = webkit.WebView()
    window = gtk.Window()
    scrolled = gtk.ScrolledWindow()
    scrolled.add(view)
    window.add(scrolled)
    return window, view
 
def main(args):
    import optparse
    usage = """usage: %%prog [Options]\n\n%s""" % __doc__.strip()
    parser = optparse.OptionParser(usage)
    parser.add_option('-t', '--test', dest='test', action="store_true", 
                      default=False, help="Run in test mode (show webview)")
    options, args0 = parser.parse_args(args)    
    url, = args0
    resource_regexp = first(pattern for (urlre, pattern) in SITES.iteritems() 
                       if re.search(urlre, url))    
    if not resource_regexp and not options.test:
        debug("No module found for URL: %s" % url)
        return 1
    window, webview = create_webview()
    skip_regexp = re.compile(r"\.(jpg|png|gif|css)(\?|$)", re.I)
    webview.connect("resource-request-starting", on_request, resource_regexp, skip_regexp)
    webview.load_uri(url)    
    if options.test:
        window.resize(640, 480)
        window.show_all()        
    gtk.main()

if __name__ == '__main__':
    sys.exit(main(sys.argv[1:]))

History