Lightweight XML constructor and reader « Python recipes

XML is a wonderful buzzword, so clients often like to have exports of data in that format. But as a programmer you may not like to fiddle around with various XML Parsers. Here is a very easy solution, that doesn't offer all capabilities of XML but sufficient stuff for creating valid XML outputs and read them later.

      from xml.dom.minidom import Document, parse, parseString
from types import StringType, UnicodeType
import string

enc = "iso-8859-1"

def _encode(v):
    if isinstance(v, UnicodeType):
        v = v.encode(enc)
    return v

class XMLElement:

    def __init__(self, doc, el):
        self.doc = doc
        self.el = el

    def __getitem__(self, name):
        a = self.el.getAttributeNode(name)
        if a:
            return _encode(a.value)
        return None

    def __setitem__(self, name, value):
        self.el.setAttribute(name, _encode(value))

    def __delitem__(self, name):
        self.el.removeAttribute(name)

    def __str__(self):
        return _encode(self.doc.toprettyxml())

    def toString(self):
        return _encode(self.doc.toxml())

    def _inst(self, el):
        return XMLElement(self.doc, el)

    def get(self, name, default=None):
        a = self.el.getAttributeNode(name)
        if a:
            return _encode(a.value)
        return _encode(default)

    def add(self, tag, **kwargs):
        el = self.doc.createElement(tag)
        for k, v in kwargs.items():
            el.setAttribute(k, _encode(str(v)))
        return self._inst(self.el.appendChild(el))

    def addText(self, data):
        return self._inst(
            self.el.appendChild(
                self.doc.createTextNode(_encode(data))))

    def addComment(self, data):
        return self._inst(
            self.el.appendChild(
                self.doc.createComment(data)))

    def getText(self, sep=" "):
        rc = []
        for node in self.el.childNodes:
            if node.nodeType == node.TEXT_NODE:
                rc.append(node.data)
        return _encode(string.join(rc, sep))

    def getAll(self, tag):
        return map(self._inst, self.el.getElementsByTagName(tag))

class _Document(Document):

    def writexml(self, writer, indent="", addindent="", newl=""):
        writer.write('<?xml version="1.0" encoding="%s" ?>\n' % enc)
        for node in self.childNodes:
            node.writexml(writer, indent, addindent, newl)

class XMLDocument(XMLElement):

    def __init__(self, tag=None, **kwargs):
        self.doc  = _Document()
        XMLElement.__init__(self, self.doc, self.doc)
        if tag:
            self.el = self.add(tag, **kwargs).el

    def parse(self, d):
        self.doc = self.el = parse(d)
        return self

    def parseString(self, d):
        self.doc = self.el = parseString(_encode(d))
        return self

if __name__=="__main__":

    # Example of dumping a database structure
    doc = XMLDocument("database", name="testdb")
    table = doc.add("table", name="test")
    table.add("field", name="counter", type="int")
    table.add("field", name="name", type="varchar")
    table.add("field", name="info", type="text")
    print doc

    # Simulate reading a XML file
    ndoc = XMLDocument()
    ndoc.parseString(str(doc))
    root = ndoc.getAll("database")
    if root:
        db = root[0]
        print "Database:", db["name"]
        for table in db.getAll("table"):
            print "  Table:", table["name"]
            for field in db.getAll("field"):
                print "    Field:", field["name"], "- Type:", field["type"]

    # It's object oriented
    print XMLDocument("notice").add("text",format="plain").addText("Some text")

      

The example shows how to dump a database structure in XML and then parse it again. The central class is XMLElement, even the XMLDocument derives from that. This module also handles the encoding in quite an easy way, that's usefull if you don't like to use unicode data.

All in all this is just an easy object oriented pythonic way to build XML documents and parse them latter and then you can say: Yes, my software is capable of XML exports and imports ;-)

We use this software in our DADO Application Server (http://www.spirito.de)

Tags: xml

6 comments

Mark Brown 20 years, 4 months ago # | flag

Broken in Python 2.3.2. This example generates the following traceback in Python 2.3.2:

Traceback (most recent call last):

File "C:\MyPython\XMLexample.py", line 102, in -toplevel-

print doc

File "C:\MyPython\XMLexample.py", line 31, in __str__

return _encode(self.doc.toprettyxml())

File "C:\Python23\lib\xml\dom\minidom.py", line 59, in toprettyxml

self.writexml(writer, "", indent, newl, encoding)

TypeError: writexml() takes at most 5 arguments (6 given)

Vaclav Slavik 20 years, 3 months ago # | flag

Re: Broken in Python 2.3.2. xml.dom.minidom.Document API changed between 2.2 and 2.3. There are two fixes:

1) add encoding argument to _Document.writexml in the recipe, or

2) don't use _Document at all because it is no longer needed, toxml() takes optional encoding argument in Python >= 2.3

sasa sasa 19 years, 9 months ago # | flag

correction of lightweight XML parser and reader. the easiest way to solve the problem is as below:

old code :

class _Document(Document):

def writexml(self, writer, indent="", addindent="", newl=""):
    writer.write('\n' % enc)
    for node in self.childNodes:
        node.writexml(writer, indent, addindent, newl)

new code :

class _Document(Document):

def writexml(self, writer, indent="", addindent="", newl=""):
    writer.write('\n' % enc)
    for node in self.childNodes:
        node.writexml(writer, indent, addindent, newl, _encode)

Don Smith 19 years, 4 months ago # | flag

Fix doesn't work with 2.3.4. This fix doesn't work with 2.3.4: Traceback (most recent call last): File "xml_inout.py", line 101, in ? print doc File "xml_inout.py", line 31, in __str__ return _encode(self.doc.toxml()) File "C:\python23\lib\xml\dom\minidom.py", line 48, in toxml return self.toprettyxml("", "", encoding) File "C:\python23\lib\xml\dom\minidom.py", line 60, in toprettyxml self.writexml(writer, "", indent, newl, encoding) TypeError: writexml() takes at most 5 arguments (6 given)

Don Smith 19 years, 4 months ago # | flag

Aha! I finally understood one of the previous comments. Forgive me, I'm still learning Python. "Don't use _Document()" So I changed: self.doc = _Document() to: self.doc = Document()

and now it works correctly on 2.3.4!

Rob Cowie 18 years, 7 months ago # | flag

Version checking? Does this fix break the module on Python 2.2?

If it does, would a simple version checking 'if' statement do the trick?

i.e....

if (sys.version_info[0] is 2) and (sys.version_info[1] is 2):

self.doc = _document()

elif (sys.version_info[0] is 2) and (sys.version_info[1] > 2):

self.doc = Document()

◄	Python recipes (4591)	►
◄	Dirk Holtwick's recipes (15)	►

Lightweight XML constructor and reader (Python recipe) by Dirk Holtwick
ActiveState Code (http://code.activestate.com/recipes/157358/)

6 comments

Tags

Required Modules

Other Information and Tasks

Accounts

Code Recipes

Feedback & Information

ActiveState

Lightweight XML constructor and reader (Python recipe) by Dirk Holtwick ActiveState Code (http://code.activestate.com/recipes/157358/)

6 comments

Tags

Required Modules

Other Information and Tasks

Accounts

Code Recipes

Feedback & Information

ActiveState

Lightweight XML constructor and reader (Python recipe) by Dirk Holtwick
ActiveState Code (http://code.activestate.com/recipes/157358/)