Welcome, guest | Sign In | My Account | Store | Cart

Simple mapping of XML to python dictionary based on Perl XML::Simple

Python, 85 lines
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# Create python xml structures compatible with
# http://search.cpan.org/~grantm/XML-Simple-2.18/lib/XML/Simple.pm

from lxml import etree
from itertools import groupby

def xml2d(e):
    """Convert an etree into a dict structure

    @type  e: etree.Element
    @param e: the root of the tree
    @return: The dictionary representation of the XML tree
    """
    def _xml2d(e):
        kids = dict(e.attrib)
        if e.text:
            kids['__text__'] = e.text
        if e.tail:
            kids['__tail__'] = e.tail
        for k, g in groupby(e, lambda x: x.tag):
            g = [ _xml2d(x) for x in g ] 
            kids[k]=  g
        return kids
    return { e.tag : _xml2d(e) }


def d2xml(d):
    """convert dict to xml

       1. The top level d must contain a single entry i.e. the root element
       2.  Keys of the dictionary become sublements or attributes
       3.  If a value is a simple string, then the key is an attribute
       4.  if a value is dict then, then key is a subelement
       5.  if a value is list, then key is a set of sublements

       a  = { 'module' : {'tag' : [ { 'name': 'a', 'value': 'b'},
                                    { 'name': 'c', 'value': 'd'},
                                 ],
                          'gobject' : { 'name': 'g', 'type':'xx' },
                          'uri' : 'test',
                       }
           }
    >>> d2xml(a)
    <module uri="test">
       <gobject type="xx" name="g"/>
       <tag name="a" value="b"/>
       <tag name="c" value="d"/>
    </module>

    @type  d: dict 
    @param d: A dictionary formatted as an XML document
    @return:  A etree Root element
    """
    def _d2xml(d, p):
        for k,v in d.items():
            if isinstance(v,dict):
                node = etree.SubElement(p, k)
                _d2xml(v, node)
            elif isinstance(v,list):
                for item in v:
                    node = etree.SubElement(p, k)
                    _d2xml(item, node)
            elif k == "__text__":
                    p.text = v
            elif k == "__tail__":
                    p.tail = v
            else:
                p.set(k, v)

    k,v = d.items()[0]
    node = etree.Element(k)
    _d2xml(v, node)
    return node
    
    

if __name__=="__main__":

    X = """<T uri="boo"><a n="1"/><a n="2"/><b n="3"><c x="y"/></b></T>"""
    print X
    Y = xml2d(etree.XML(X))
    print Y
    Z = etree.tostring (d2xml(Y) )
    print Z
    assert X == Z

There are many ways to map XML to dict's. This is one is inspired by Perl's XML::Simple way of mapping, that is pretty smart about both element tags <A> and attributes <A a="1" />

See the examples here: http://search.cpan.org/~grantm/XML-Simple-2.18/lib/XML/Simple.pm

2 comments

Russ Gibson 9 years, 2 months ago  # | flag

Doesn't handle text only nodes:

<a>test</a>

causes error, but

<a foo="test"/>

doesn't.

kris kvilekval (author) 9 years, 1 month ago  # | flag

Your right.. text and tail were ignored as they were not needed by my application, but here is a corrected version where they are handled.

Created by kris kvilekval on Thu, 26 May 2011 (LGPL)
Python recipes (4591)
kris kvilekval's recipes (1)

Required Modules

Other Information and Tasks