Simple mapping of XML to python dictionary based on Perl XML::Simple
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 | # Create python xml structures compatible with
# http://search.cpan.org/~grantm/XML-Simple-2.18/lib/XML/Simple.pm
from lxml import etree
from itertools import groupby
def xml2d(e):
"""Convert an etree into a dict structure
@type e: etree.Element
@param e: the root of the tree
@return: The dictionary representation of the XML tree
"""
def _xml2d(e):
kids = dict(e.attrib)
if e.text:
kids['__text__'] = e.text
if e.tail:
kids['__tail__'] = e.tail
for k, g in groupby(e, lambda x: x.tag):
g = [ _xml2d(x) for x in g ]
kids[k]= g
return kids
return { e.tag : _xml2d(e) }
def d2xml(d):
"""convert dict to xml
1. The top level d must contain a single entry i.e. the root element
2. Keys of the dictionary become sublements or attributes
3. If a value is a simple string, then the key is an attribute
4. if a value is dict then, then key is a subelement
5. if a value is list, then key is a set of sublements
a = { 'module' : {'tag' : [ { 'name': 'a', 'value': 'b'},
{ 'name': 'c', 'value': 'd'},
],
'gobject' : { 'name': 'g', 'type':'xx' },
'uri' : 'test',
}
}
>>> d2xml(a)
<module uri="test">
<gobject type="xx" name="g"/>
<tag name="a" value="b"/>
<tag name="c" value="d"/>
</module>
@type d: dict
@param d: A dictionary formatted as an XML document
@return: A etree Root element
"""
def _d2xml(d, p):
for k,v in d.items():
if isinstance(v,dict):
node = etree.SubElement(p, k)
_d2xml(v, node)
elif isinstance(v,list):
for item in v:
node = etree.SubElement(p, k)
_d2xml(item, node)
elif k == "__text__":
p.text = v
elif k == "__tail__":
p.tail = v
else:
p.set(k, v)
k,v = d.items()[0]
node = etree.Element(k)
_d2xml(v, node)
return node
if __name__=="__main__":
X = """<T uri="boo"><a n="1"/><a n="2"/><b n="3"><c x="y"/></b></T>"""
print X
Y = xml2d(etree.XML(X))
print Y
Z = etree.tostring (d2xml(Y) )
print Z
assert X == Z
|
There are many ways to map XML to dict's. This is one is inspired by Perl's XML::Simple way of mapping, that is pretty smart about both element tags <A> and attributes <A a="1" />
See the examples here: http://search.cpan.org/~grantm/XML-Simple-2.18/lib/XML/Simple.pm
Doesn't handle text only nodes:
causes error, but
doesn't.
Your right.. text and tail were ignored as they were not needed by my application, but here is a corrected version where they are handled.