This code converts an XML ElementTree.Element into a dict and back into XML. It attempts to retain as much information as possible during the conversions while still being convenient to access.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | from xml.etree import ElementTree
# calling example
def main():
configdict = ConvertXmlToDict('config.xml')
pprint(configdict)
# you can access the data as a dictionary
print configdict['settings']['color']
configdict['settings']['color'] = 'red'
# or you can access it like object attributes
print configdict.settings.color
configdict.settings.color = 'red'
root = ConvertDictToXml(configdict)
tree = ElementTree.ElementTree(root)
tree.write('config.new.xml')
# Module Code:
class XmlDictObject(dict):
"""
Adds object like functionality to the standard dictionary.
"""
def __init__(self, initdict=None):
if initdict is None:
initdict = {}
dict.__init__(self, initdict)
def __getattr__(self, item):
return self.__getitem__(item)
def __setattr__(self, item, value):
self.__setitem__(item, value)
def __str__(self):
if self.has_key('_text'):
return self.__getitem__('_text')
else:
return ''
@staticmethod
def Wrap(x):
"""
Static method to wrap a dictionary recursively as an XmlDictObject
"""
if isinstance(x, dict):
return XmlDictObject((k, XmlDictObject.Wrap(v)) for (k, v) in x.iteritems())
elif isinstance(x, list):
return [XmlDictObject.Wrap(v) for v in x]
else:
return x
@staticmethod
def _UnWrap(x):
if isinstance(x, dict):
return dict((k, XmlDictObject._UnWrap(v)) for (k, v) in x.iteritems())
elif isinstance(x, list):
return [XmlDictObject._UnWrap(v) for v in x]
else:
return x
def UnWrap(self):
"""
Recursively converts an XmlDictObject to a standard dictionary and returns the result.
"""
return XmlDictObject._UnWrap(self)
def _ConvertDictToXmlRecurse(parent, dictitem):
assert type(dictitem) is not type([])
if isinstance(dictitem, dict):
for (tag, child) in dictitem.iteritems():
if str(tag) == '_text':
parent.text = str(child)
elif type(child) is type([]):
# iterate through the array and convert
for listchild in child:
elem = ElementTree.Element(tag)
parent.append(elem)
_ConvertDictToXmlRecurse(elem, listchild)
else:
elem = ElementTree.Element(tag)
parent.append(elem)
_ConvertDictToXmlRecurse(elem, child)
else:
parent.text = str(dictitem)
def ConvertDictToXml(xmldict):
"""
Converts a dictionary to an XML ElementTree Element
"""
roottag = xmldict.keys()[0]
root = ElementTree.Element(roottag)
_ConvertDictToXmlRecurse(root, xmldict[roottag])
return root
def _ConvertXmlToDictRecurse(node, dictclass):
nodedict = dictclass()
if len(node.items()) > 0:
# if we have attributes, set them
nodedict.update(dict(node.items()))
for child in node:
# recursively add the element's children
newitem = _ConvertXmlToDictRecurse(child, dictclass)
if nodedict.has_key(child.tag):
# found duplicate tag, force a list
if type(nodedict[child.tag]) is type([]):
# append to existing list
nodedict[child.tag].append(newitem)
else:
# convert to list
nodedict[child.tag] = [nodedict[child.tag], newitem]
else:
# only one, directly set the dictionary
nodedict[child.tag] = newitem
if node.text is None:
text = ''
else:
text = node.text.strip()
if len(nodedict) > 0:
# if we have a dictionary add the text as a dictionary value (if there is any)
if len(text) > 0:
nodedict['_text'] = text
else:
# if we don't have child nodes or attributes, just set the text
nodedict = text
return nodedict
def ConvertXmlToDict(root, dictclass=XmlDictObject):
"""
Converts an XML file or ElementTree Element to a dictionary
"""
# If a string is passed in, try to open it as a file
if type(root) == type(''):
root = ElementTree.parse(root).getroot()
elif not isinstance(root, ElementTree.Element):
raise TypeError, 'Expected ElementTree.Element or file path string'
return dictclass({root.tag: _ConvertXmlToDictRecurse(root, dictclass)})
if __name__ == '__main__':
main()
|
I wanted to keep as much data when converting as possible. In order to achieve this, I convert dict items to lists if a duplicate exist. Also, I pack the text of an element into the dict using the key '_text' if attributes or child nodes exist. This algorithm may not be the best method for all situations, but it works well when loading XML config files and writing them out again.
Update: Added a dict wrapper object. Now the data can be treated like an object instead of a dictionary.