"""Simple XML marshaling (serializing) and unmarshaling(de-serializing) module using Python dictionaries and the marshal module. """ from xml.sax.handler import ContentHandler from xml.sax.saxutils import XMLGenerator from xml.sax.xmlreader import XMLReader from xml.sax import make_parser import marshal import os,sys,zlib class XMLDictionaryHandler(ContentHandler): """SAX Handler class which converts an XML file to a corresponding Python dictionary """ def __init__(self): self.curr='' self.parent='' self.count=0 self.d = {} self.currd = {} self.parentd = {} self.stack = [] self.stack2 = [] def startElement(self, name, attrs): """ Start element handler """ if self.count==0: self.parent=name self.d[name] = [dict(attrs), '', []] self.currd = self.d else: chld={name: [dict(attrs), '', [] ]} self.parent = self.stack[-1] self.parentd = self.stack2[-1] chldlist = (self.parentd[self.parent])[2] chldlist.append(chld) self.currd = chld self.stack.append(name) self.stack2.append(self.currd) self.curr=name self.count += 1 def endElement(self, name): """ End element handler """ self.stack.remove(name) for item in self.stack2: if item.has_key(name): self.stack2.remove(item) def characters(self, content): """ Character handler """ content = (content.encode('utf-8')).strip() if content: myd=((self.parentd[self.parent])[2])[-1] currcontent = (myd[self.curr])[1] (myd[self.curr])[1] = "".join((currcontent, content)) def endDocument(self): """ End document handler """ # Compress all text items self.packtext(self.d) def packtext(self, map): for key, value in map.items(): text = value[1] value[1] = zlib.compress(text) children = value[2] for submap in children: self.packtext(submap) class BinXMLSAXParser(XMLReader): """A parser for Python binary marshal files representing XML information using SAX interfaces """ def __init__(self): XMLReader.__init__(self) self.depth = 0 def parse(self, stream): """ Parse Method """ # Check if it is a file object if type(stream) is file: try: self.d = marshal.load(stream) except Exception, e: sys.exit(e) # Check if it is a file path elif os.path.exists(stream): try: self.d = marshal.load(open(stream,'rb')) except Exception, e: sys.exit(e) else: raise 'BinXMLSAXParserException: Invalid Input Source' self._cont_handler.startDocument() self.__parse(self.d) self._cont_handler.endDocument() def __parse(self, map): """ Recursive parse method for XML dictionary """ for key, value in map.items(): # For pretty printing self._cont_handler.ignorableWhitespace(" "*self.depth) attrs = value[0] text = value[1] children = value[2] # Fire startElement handler event for key self._cont_handler.startElement(key, attrs) # Fire character handler event for value self._cont_handler.characters(zlib.decompress(text)) # Nested element, recursively call # this function... self.depth += 1 # For pretty printing self._cont_handler.ignorableWhitespace('\n') for child in children: self.__parse(child) self.depth -= 1 # For pretty printing self._cont_handler.ignorableWhitespace(" "*self.depth) # Fire end element handler event self._cont_handler.endElement(key) # For pretty printing self._cont_handler.ignorableWhitespace('\n') class XMLMarshal(object): """ The XML marshalling class """ def dump(stream, xmlfile): """ Serialize XML data to a file """ try: p=make_parser() h = XMLDictionaryHandler() p.setContentHandler(h) p.parse(open(xmlfile)) # print h.d marshal.dump(h.d, stream) except Exception, e: sys.exit(e) def dumps(stream, xmlfile): """ Serialize XML data to a string """ try: p=make_parser() p.setContentHandler() h = XMLDictionaryHandler() p.parse(open(xmlfile)) return marshal.dumps(h.d, stream) except Exception, e: sys.exit(e) return None def load(stream, out=sys.stdout): """ Load an XML binary stream and send XML text to the output stream 'out' """ try: p=BinXMLSAXParser() p.setContentHandler(XMLGenerator(out)) p.parse(stream) except Exception, e: sys.exit(e) def loads(stream): """ Load an XML binary stream and return XML text as string """ import cStringIO c=cStringIO.StringIO() try: p=BinXMLSAXParser() p.setContentHandler(XMLGenerator(c)) p.parse(stream) except Exception, e: sys.exit(e) return c.getvalue() dump=staticmethod(dump) dumps=staticmethod(dumps) load=staticmethod(load) loads=staticmethod(loads) if __name__ == '__main__': fname = 'sample.xml' binname = os.path.splitext(fname)[0] + '.bin' # Dump XML text to binary XMLMarshal.dump(open(binname,'wb'), fname) # Dump XML binary to text XMLMarshal.load(open(binname,'rb'), open('sample.xml','w'))