Welcome, guest | Sign In | My Account | Store | Cart
#!/usr/bin/env python

import sgmllib, string, urllib

class DiaryParser(sgmllib.SGMLParser):
   
   
def __init__(self):
        sgmllib
.SGMLParser.__init__(self)
       
self.entries = []
       
self.dates = []
       
self.inHtml = 0
       
self.inDate = 0
       
self.data = ""
       
   
def handle_data(self, data):
       
self.data = self.data + data
   
   
def unknown_starttag(self, tag, attrs):
       
pass
               
   
def unknown_endtag(self, tag):
       
pass

   
def start_html(self, attributes):
       
self.inHtml = 1
       
self.data = ""
       
self.setliteral()
   
   
def end_html(self):
       
self.entries.append(self.data)
       
self.inHtml = 0
   
   
def start_date(self, attributes):
       
self.data = ""
       
self.setliteral()
   
   
def end_html(self):
       
self.entries.append(self.data)
       
self.inHtml = 0
   
   
def start_date(self, attributes):
       
self.data = ""
       
self.inDate = 1
   
   
def end_date(self):
       
self.dates.append(self.data)
       
self.inDate = 0
       

def getEntries(person):
   
""" Fetch a Advogato member's diary and return a dictionary in the form
        { date : entry, ... }
    """

   
    parser
= DiaryParser()
    f
= urllib.urlopen("http://www.advogato.org/person/%s/diary.xml" % urllib.quote(person))
   
    s
= f.read(8192)
   
while s:
        parser
.feed(s)
        s
= f.read(8192)
   
    parser
.close()
    result
= {}
   
for d, e in map(None, parser.dates, parser.entries):
        result
[d] = e
   
return result


if __name__=='__main__':
   
import sys
   
print getEntries(sys.argv[1])

History