Welcome, guest | Sign In | My Account | Store | Cart
"""Simple XML marshaling (serializing) and
  unmarshaling(de-serializing) module using Python
  dictionaries and the marshal module.
"""

from xml.sax.handler import ContentHandler
from xml.sax.saxutils import XMLGenerator
from xml.sax.xmlreader import XMLReader
from xml.sax import make_parser
import marshal
import os,sys,zlib

class XMLDictionaryHandler(ContentHandler):
    """SAX Handler class which converts an XML
    file to a corresponding Python dictionary """

    def __init__(self):
        self.curr=''
        self.parent=''
        self.count=0
        self.d = {}
        self.currd = {}
        self.parentd = {}
        self.stack = []
        self.stack2 = []

    def startElement(self, name, attrs):
        """ Start element handler """

        if self.count==0:
            self.parent=name
            self.d[name] = [dict(attrs),
                            '',
                            []]
            self.currd = self.d
        else:
            chld={name: [dict(attrs),
                         '',
                         [] ]}
            self.parent = self.stack[-1]
            self.parentd = self.stack2[-1]

            chldlist = (self.parentd[self.parent])[2]
            chldlist.append(chld)
            self.currd = chld

        self.stack.append(name)
        self.stack2.append(self.currd)

        self.curr=name
        self.count += 1

    def endElement(self, name):
        """ End element handler """

        self.stack.remove(name)
        for item in self.stack2:
            if item.has_key(name):
                self.stack2.remove(item)

    def characters(self, content):
        """ Character handler """

        content = (content.encode('utf-8')).strip()

        if content:
            myd=((self.parentd[self.parent])[2])[-1]
            currcontent = (myd[self.curr])[1]
            (myd[self.curr])[1] = "".join((currcontent, content))

    def endDocument(self):
        """ End document handler """
        
        # Compress all text items
        self.packtext(self.d)
        
    def packtext(self, map):
    
        for key, value in map.items():
            text = value[1]
            value[1] = zlib.compress(text)
            children = value[2]
            for submap in children:
                self.packtext(submap)
        
class BinXMLSAXParser(XMLReader):
    """A parser for Python binary marshal files representing
    XML information using SAX interfaces """

    def __init__(self):
        XMLReader.__init__(self)
        self.depth = 0

    def parse(self, stream):
        """ Parse Method """

        # Check if it is a file object
        if type(stream) is file:
            try:
                self.d = marshal.load(stream)
            except Exception, e:
                sys.exit(e)

        # Check if it is a file path
        elif os.path.exists(stream):
            try:
                self.d = marshal.load(open(stream,'rb'))
            except Exception, e:
                sys.exit(e)
        else:
            raise 'BinXMLSAXParserException: Invalid Input Source'

        self._cont_handler.startDocument()
        self.__parse(self.d)
        self._cont_handler.endDocument()

    def __parse(self, map):
        """ Recursive parse method for
        XML dictionary """

        for key, value in map.items():
            # For pretty printing
            self._cont_handler.ignorableWhitespace(" "*self.depth)
            attrs = value[0]
            text = value[1]
            children = value[2]
            # Fire startElement handler event for key
            self._cont_handler.startElement(key, attrs)
            # Fire character handler event for value
            self._cont_handler.characters(zlib.decompress(text))
            # Nested element, recursively call
            # this function...
            self.depth += 1
            # For pretty printing
            self._cont_handler.ignorableWhitespace('\n')
            for child in children:
                self.__parse(child)
            self.depth -= 1
            # For pretty printing
            self._cont_handler.ignorableWhitespace(" "*self.depth)
            # Fire end element handler event
            self._cont_handler.endElement(key)
            # For pretty printing
            self._cont_handler.ignorableWhitespace('\n')

class XMLMarshal(object):
    """ The XML marshalling class """

    def dump(stream, xmlfile):
        """ Serialize XML data to a file """

        try:
            p=make_parser()
            h = XMLDictionaryHandler()
            p.setContentHandler(h)
            p.parse(open(xmlfile))
            # print h.d
            marshal.dump(h.d, stream)
        except Exception, e:
            sys.exit(e)

    def dumps(stream, xmlfile):
        """ Serialize XML data to a string """

        try:
            p=make_parser()
            p.setContentHandler()
            h = XMLDictionaryHandler()
            p.parse(open(xmlfile))
            return marshal.dumps(h.d, stream)
        except Exception, e:
            sys.exit(e)

        return None

    def load(stream, out=sys.stdout):
        """ Load an XML binary stream
        and send XML text to the output
        stream 'out' """

        try:
            p=BinXMLSAXParser()
            p.setContentHandler(XMLGenerator(out))
            p.parse(stream)
        except Exception, e:
            sys.exit(e)

    def loads(stream):
        """ Load an XML binary stream
        and return XML text as string """

        import cStringIO
        c=cStringIO.StringIO()

        try:
            p=BinXMLSAXParser()
            p.setContentHandler(XMLGenerator(c))
            p.parse(stream)
        except Exception, e:
            sys.exit(e)

        return c.getvalue()

    dump=staticmethod(dump)
    dumps=staticmethod(dumps)
    load=staticmethod(load)
    loads=staticmethod(loads)

if __name__ == '__main__':

    fname = 'sample.xml'
    binname = os.path.splitext(fname)[0] + '.bin'

    # Dump XML text to binary
    XMLMarshal.dump(open(binname,'wb'), fname)
    # Dump XML binary to text
    XMLMarshal.load(open(binname,'rb'), open('sample.xml','w'))

History

  • revision 2 (16 years ago)
  • previous revisions are not available