Welcome, guest | Sign In | My Account | Store | Cart
#coding:utf-8

import HTMLParser
html=HTMLParser.HTMLParser


class   MyHtmlparser(html):
        def __init__(self):
                html.__init__(self)
                self.lidata=[]
                self.dic={}#用来登记获得的tag和其相应的属性

        
        def handle_data(self,data):
                self.lidata.append(data)

        def handle_starttag(self,tag,attrs):
                self.dic[tag]=attrs
                




        def handle_endtag(self,tag):
                pass
                
mydir="c://html//"
f=file(mydir+"1.htm")
in_data=f.read()
f.close()
my = MyHtmlparser()
my.feed(in_data)


for i in my.lidata:
        print i

for i in my.dic:
        print i 

History