takes an ambiguous xml file and generates the ET code to generate that xml file. this is useful if you have an example xml file, or an ambiguous xml file that you'd like to use as a template to parameterize certain elements. upgrades coming.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | import xml.etree.ElementTree as ET
import re
class ETGen(object):
TAGSUFFIX = 'XMLTag'
def __init__(self, xmlin, out, param={}):
self.counter = 0
self.constants = {}
self.lines = []
h = open(xmlin, 'r')
xml = h.read()
h.close()
builder = ET.XMLTreeBuilder()
builder.feed(xml)
tree = builder.close()
self.out = out
self.__walk(tree, None)
self.__write()
def __genName(self, name):
self.counter += 1
return re.search('(?:{.*?})?(.*)', name).group(1) + ETGen.TAGSUFFIX + str(self.counter)
def __write(self):
h = open(self.out, 'w')
h.write("import xml.etree.ElementTree as ET\n\n")
# prints namespace constants
h.writelines(["%s = '%s'\n" % (v, k) for k, v in self.constants.items()])
h.write("\n")
h.write("def build(**kwargs):\n\t")
h.write("\n\t".join(self.lines))
h.write("\n\treturn ET.tostring(%s)\n\n" % self.root)
h.write("if __name__ == '__main__': print build()")
h.close()
def __getNamespace(self, name):
ns = re.search('(?:{(.*?)})?(.*)', name).group(1)
if ns is None:
return '\'%s\'' % name
if ns not in self.constants:
nsName = "NS" + str(len(self.constants))
self.constants[ns] = nsName
else:
nsName = self.constants[ns]
tag = re.sub('{.*?}(.*)', '\'{%%s}\\1\' %% %s' % nsName, name)
return tag
def __walk(self, node, parent):
name = self.__genName(node.tag)
tag = self.__getNamespace(node.tag)
if parent is None:
self.root = name
self.lines.append("%s = ET.Element(%s)" % (name, tag))
else:
self.lines.append("%s = ET.SubElement(%s, %s)" % (name, parent, tag))
# handles text
try:
t = node.text.strip()
if t == '': t = None
except:
t = None
if t is not None:
self.lines.append("%s.text = kwargs.get('', '%s') # PARAMETERIZE" % (name, t))
# handles attributes
for key,val in node.items():
key = self.__getNamespace(key)
self.lines.append("%s.set(%s, kwargs.get('', '%s')) # PARAMETERIZE" % (name, key, val))
for i in node.getchildren():
self.__walk(i, name)
def main():
g = ETGen('/home/user/manifest.xml', '/home/user/manifest_generator.py')
if __name__ == '__main__': main()
|
the primary need for this recipe came from me not wanting to read the xml specification for an IMS manifest, but having an example manifest to work with. after writing this, i just fed the manifest in and parameterized the values i needed. simple :)
once the ET generator file is created, import it into your main program. from there, run the "build()" function with named keywords to fill in the blanks on the template