from win32com.client import Dispatch html = Dispatch ( 'htmlfile' ) // disguise for MSHTML as a COM server html.writeln( "
A title
This is some of it. And this is the rest." ) print "Title: %s" % ( html.title, ) print "Bag of words from body of the page: %s" % ( html.body.innerText, ) print "URL associated with the page: %s" % ( html.url, ) print "Display of name:content pairs from meta tags: " metas=html.getElementsByTagName("meta") for m in xrange ( metas.length ): print "\t%s: %s" % ( metas [ m ] . name, metas [ m ] . content, )