XML is a wonderful buzzword, so clients often like to have exports of data in that format. But as a programmer you may not like to fiddle around with various XML Parsers. Here is a very easy solution, that doesn't offer all capabilities of XML but sufficient stuff for creating valid XML outputs and read them later.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 | from xml.dom.minidom import Document, parse, parseString
from types import StringType, UnicodeType
import string
enc = "iso-8859-1"
def _encode(v):
if isinstance(v, UnicodeType):
v = v.encode(enc)
return v
class XMLElement:
def __init__(self, doc, el):
self.doc = doc
self.el = el
def __getitem__(self, name):
a = self.el.getAttributeNode(name)
if a:
return _encode(a.value)
return None
def __setitem__(self, name, value):
self.el.setAttribute(name, _encode(value))
def __delitem__(self, name):
self.el.removeAttribute(name)
def __str__(self):
return _encode(self.doc.toprettyxml())
def toString(self):
return _encode(self.doc.toxml())
def _inst(self, el):
return XMLElement(self.doc, el)
def get(self, name, default=None):
a = self.el.getAttributeNode(name)
if a:
return _encode(a.value)
return _encode(default)
def add(self, tag, **kwargs):
el = self.doc.createElement(tag)
for k, v in kwargs.items():
el.setAttribute(k, _encode(str(v)))
return self._inst(self.el.appendChild(el))
def addText(self, data):
return self._inst(
self.el.appendChild(
self.doc.createTextNode(_encode(data))))
def addComment(self, data):
return self._inst(
self.el.appendChild(
self.doc.createComment(data)))
def getText(self, sep=" "):
rc = []
for node in self.el.childNodes:
if node.nodeType == node.TEXT_NODE:
rc.append(node.data)
return _encode(string.join(rc, sep))
def getAll(self, tag):
return map(self._inst, self.el.getElementsByTagName(tag))
class _Document(Document):
def writexml(self, writer, indent="", addindent="", newl=""):
writer.write('<?xml version="1.0" encoding="%s" ?>\n' % enc)
for node in self.childNodes:
node.writexml(writer, indent, addindent, newl)
class XMLDocument(XMLElement):
def __init__(self, tag=None, **kwargs):
self.doc = _Document()
XMLElement.__init__(self, self.doc, self.doc)
if tag:
self.el = self.add(tag, **kwargs).el
def parse(self, d):
self.doc = self.el = parse(d)
return self
def parseString(self, d):
self.doc = self.el = parseString(_encode(d))
return self
if __name__=="__main__":
# Example of dumping a database structure
doc = XMLDocument("database", name="testdb")
table = doc.add("table", name="test")
table.add("field", name="counter", type="int")
table.add("field", name="name", type="varchar")
table.add("field", name="info", type="text")
print doc
# Simulate reading a XML file
ndoc = XMLDocument()
ndoc.parseString(str(doc))
root = ndoc.getAll("database")
if root:
db = root[0]
print "Database:", db["name"]
for table in db.getAll("table"):
print " Table:", table["name"]
for field in db.getAll("field"):
print " Field:", field["name"], "- Type:", field["type"]
# It's object oriented
print XMLDocument("notice").add("text",format="plain").addText("Some text")
|
The example shows how to dump a database structure in XML and then parse it again. The central class is XMLElement, even the XMLDocument derives from that. This module also handles the encoding in quite an easy way, that's usefull if you don't like to use unicode data.
All in all this is just an easy object oriented pythonic way to build XML documents and parse them latter and then you can say: Yes, my software is capable of XML exports and imports ;-)
We use this software in our DADO Application Server (http://www.spirito.de)
Broken in Python 2.3.2. This example generates the following traceback in Python 2.3.2:
Traceback (most recent call last):
File "C:\MyPython\XMLexample.py", line 102, in -toplevel-
File "C:\MyPython\XMLexample.py", line 31, in __str__
File "C:\Python23\lib\xml\dom\minidom.py", line 59, in toprettyxml
TypeError: writexml() takes at most 5 arguments (6 given)
Re: Broken in Python 2.3.2. xml.dom.minidom.Document API changed between 2.2 and 2.3. There are two fixes:
1) add encoding argument to _Document.writexml in the recipe, or
2) don't use _Document at all because it is no longer needed, toxml() takes optional encoding argument in Python >= 2.3
correction of lightweight XML parser and reader. the easiest way to solve the problem is as below:
old code :
class _Document(Document):
new code :
class _Document(Document):
Fix doesn't work with 2.3.4. This fix doesn't work with 2.3.4: Traceback (most recent call last): File "xml_inout.py", line 101, in ? print doc File "xml_inout.py", line 31, in __str__ return _encode(self.doc.toxml()) File "C:\python23\lib\xml\dom\minidom.py", line 48, in toxml return self.toprettyxml("", "", encoding) File "C:\python23\lib\xml\dom\minidom.py", line 60, in toprettyxml self.writexml(writer, "", indent, newl, encoding) TypeError: writexml() takes at most 5 arguments (6 given)
Aha! I finally understood one of the previous comments. Forgive me, I'm still learning Python. "Don't use _Document()" So I changed: self.doc = _Document() to: self.doc = Document()
and now it works correctly on 2.3.4!
Version checking? Does this fix break the module on Python 2.2?
If it does, would a simple version checking 'if' statement do the trick?
i.e....
if (sys.version_info[0] is 2) and (sys.version_info[1] is 2):
self.doc = _document()
elif (sys.version_info[0] is 2) and (sys.version_info[1] > 2):
self.doc = Document()