Welcome, guest | Sign In | My Account | Store | Cart

takes an ambiguous xml file and generates the ET code to generate that xml file. this is useful if you have an example xml file, or an ambiguous xml file that you'd like to use as a template to parameterize certain elements. upgrades coming.

Python, 86 lines
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import xml.etree.ElementTree as ET
import re

class ETGen(object):
    TAGSUFFIX = 'XMLTag'
    def __init__(self, xmlin, out, param={}):
        self.counter = 0
        self.constants = {}
        self.lines = []
        
        h = open(xmlin, 'r')
        xml = h.read()
        h.close()
        
        builder = ET.XMLTreeBuilder()
        builder.feed(xml)
        tree = builder.close()
        
        self.out = out
        
        self.__walk(tree, None)
        self.__write()
        
    def __genName(self, name):
        self.counter += 1
        return re.search('(?:{.*?})?(.*)', name).group(1) + ETGen.TAGSUFFIX + str(self.counter)
    
    def __write(self):
        h = open(self.out, 'w')
        h.write("import xml.etree.ElementTree as ET\n\n")
        
        # prints namespace constants
        h.writelines(["%s = '%s'\n" % (v, k) for k, v in self.constants.items()])
        h.write("\n")
        
        h.write("def build(**kwargs):\n\t")
        
        h.write("\n\t".join(self.lines))
        h.write("\n\treturn ET.tostring(%s)\n\n" % self.root)
        
        h.write("if __name__ == '__main__': print build()")
        h.close()
    
    def __getNamespace(self, name):
        ns = re.search('(?:{(.*?)})?(.*)', name).group(1)
        if ns is None:
            return '\'%s\'' % name
        if ns not in self.constants:
            nsName = "NS" + str(len(self.constants))
            self.constants[ns] = nsName
        else:
            nsName = self.constants[ns]
        tag = re.sub('{.*?}(.*)', '\'{%%s}\\1\' %% %s' % nsName, name)
        return tag
        
    def __walk(self, node, parent):
        name = self.__genName(node.tag)        
        tag = self.__getNamespace(node.tag)
              
        if parent is None:
            self.root = name
            self.lines.append("%s = ET.Element(%s)" % (name, tag))
        else:
            self.lines.append("%s = ET.SubElement(%s, %s)" % (name, parent, tag))
            
            # handles text
            try:
                t = node.text.strip()
                if t == '': t = None
            except:
                t = None
                
            if t is not None:
                self.lines.append("%s.text = kwargs.get('', '%s') # PARAMETERIZE" % (name, t))
                
            # handles attributes
            for key,val in node.items():
                key = self.__getNamespace(key)
                self.lines.append("%s.set(%s, kwargs.get('', '%s')) # PARAMETERIZE" % (name, key, val))
        for i in node.getchildren():
            self.__walk(i, name)

def main():
    g = ETGen('/home/user/manifest.xml', '/home/user/manifest_generator.py')
    
if __name__ == '__main__': main()

the primary need for this recipe came from me not wanting to read the xml specification for an IMS manifest, but having an example manifest to work with. after writing this, i just fed the manifest in and parameterized the values i needed. simple :)

once the ET generator file is created, import it into your main program. from there, run the "build()" function with named keywords to fill in the blanks on the template