Welcome, guest | Sign In | My Account | Store | Cart

I really like config files done in Python language itself, using a dictionary declaration. It's cool for programmers, but not so cool for system administrators not used to Python (it's so easy to forget a comma...). To keep using dictionaries internally providing something more admin friendly, I've done some functions to convert a XML file to Python dictionary (and the reverse as well):

Python, 232 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
import sys
from cStringIO import StringIO
from xml.parsers import expat

def list_to_xml(name, l, stream):
   for d in l:
      dict_to_xml(d, name, stream)

def dict_to_xml(d, root_node_name, stream):
   """ Transform a dict into a XML, writing to a stream """
   stream.write('\n<' + root_node_name)
   attributes = StringIO() 
   nodes = StringIO()
   for item in d.items():
      key, value = item
      if isinstance(value, dict):
         dict_to_xml(value, key, nodes)
      elif isinstance(value, list):
         list_to_xml(key, value, nodes)
      elif isinstance(value, str) or isinstance(value, unicode):
         attributes.write('\n  %s="%s" ' % (key, value))
      else:
         raise TypeError('sorry, we support only dicts, lists and strings')

   stream.write(attributes.getvalue())
   nodes_str = nodes.getvalue()
   if len(nodes_str) == 0:
      stream.write('/>')
   else:
      stream.write('>')
      stream.write(nodes_str)
      stream.write('\n</%s>' % root_node_name)

def dict_from_xml(xml):
   """ Load a dict from a XML string """

   def list_to_dict(l, ignore_root = True):
      """ Convert our internal format list to a dict. We need this
          because we use a list as a intermediate format during xml load """
      root_dict = {}
      inside_dict = {}
      # index 0: node name
      # index 1: attributes list
      # index 2: children node list
      root_dict[l[0]] = inside_dict
      inside_dict.update(l[1])
      # if it's a node containing lot's of nodes with same name,
      # like <list><item/><item/><item/><item/><item/></list>
      for x in l[2]:
         d = list_to_dict(x, False)
         for k, v in d.iteritems():
            if not inside_dict.has_key(k):
               inside_dict[k] = []
               
            inside_dict[k].append(v)

      ret = root_dict.values()[0] if ignore_root else root_dict
         
      return ret
   
   class M:
      """ This is our expat event sink """
      def __init__(self):
         self.lists_stack = []
         self.current_list = None
      def start_element(self, name, attrs):
         l = []
         # root node?
         if self.current_list is None:
            self.current_list = [name, attrs, l]
         else:
            self.current_list.append([name, attrs, l])

         self.lists_stack.append(self.current_list)
         self.current_list = l         
         pass
          
      def end_element(self, name):
         self.current_list = self.lists_stack.pop()
      def char_data(self, data):
         # We don't write char_data to file (beyond \n and spaces).
         # What to do? Raise?
         pass

   p = expat.ParserCreate()
   m = M()

   p.StartElementHandler = m.start_element
   p.EndElementHandler = m.end_element
   p.CharacterDataHandler = m.char_data

   p.Parse(xml)

   d = list_to_dict(m.current_list)
   
   return d

class ConfigHolder:
    def __init__(self, d=None):
        """
        Init from dict d
        """
        self.d = {} if d is None else d

    def __str__(self):
        return self.d.__str__()

    __repr__ = __str__

    def load_from_xml(self, xml):
        self.d = dict_from_xml(xml)

    def load_from_dict(self, d):
        self.d = d

    def get_must_exist(self, key):
        v = self.get(key)

        if v is None:
            raise KeyError('the required config key "%s" was not found' % key)

        return v

    def __getitem__(self, key):
        """
        Support for config['path/key'] syntax
        """
        return self.get_must_exist(key)

    def get(self, key, default=None):
        """
        Get from config using a filesystem-like syntax

        value = 'start/sub/key' will
        return config_map['start']['sub']['key']
        """
        try:
            d = self.d

            path = key.split('/')
            # handle 'key/subkey[2]/value/'
            if path[-1] == '' :
                path = path[:-1]
            
            for x in path[:len(path)-1]:
                i = x.find('[')
                if i:
                   if x[-1] != ']':
                      raise Exception('invalid syntax')
                   index = int(x[i+1:-1])
                   
                   d = d[x[:i]][index]
                else:
                   d = d[x]

            return d[path[-1]]

        except:
            return default



def DoTest():
    config_dict = \
    { \
      'config_name': 'test',
      'source':
          {
           'address': 'address_value',
           'port': 'port_value',
          },
      'destination':
          {
           'address': 'address_value',
           'port': 'port_value',
           'routing_exceptions':
               {
                'test':
                    {
                     'address': 'localhost',
                     'port': 'port_value'
                    }
               }
          },
      'lists' :
      {
         'list_item':
            [
               { 'address' : 'address_value',
                 'port' : 'port-value'
               },
               { 'address' : 'address_value',
                 'port' : 'port-value'
               }
            ]
      }
    }


    test_count = 3
    previous_xml = None

    for x in range(test_count):
       s = StringIO()
       dict_to_xml(config_dict, 'config', s)

       xml = s.getvalue()
       print xml

       config_dict = dict_from_xml(xml)
       print config_dict

       if previous_xml != None:
          assert xml == previous_xml

       previous_xml = xml

       # using XPATH like syntax
       print config_dict['destination'][0]['port']
       print config_dict['destination'][0]['routing_exceptions'][0]['test'][0]['address']
       print config_dict['lists'][0]['list_item'][1]['address']

       # ConfigHolder makes it even easier       
       t = ConfigHolder()
       t.load_from_dict(config_dict)

       print t['destination[0]/port']
       print t['lists[0]/list_item[1]/address']


if __name__ == '__main__':
    DoTest()

Note that this code supports only dictionaries containing strings. To support other types I should add some type info to XML and it would make more difficult to edit the file without remembering another XML syntax/schema. One good side effect of using dicts and lists is that we can use a XPATH-like syntax to get config inside the dict - a very good side effect considering there's no XPATH included in the python official distribution. ConfigHolder is the bonus class. It makes the dict access syntax even better.

5 comments

brit tonf 16 years, 10 months ago  # | flag

glitches.

im using python 2.4
This may be bit premature since i haven't overly studied yet:

on lines 57 and 103, there was some syntax errors which i just commented out the latter half of those lines, and then i got:

AttributeError: 'unicode' object has no attribute 'append'
on line 55

I'll continue tweaking around and see what i come up with

This is a great idea, looks super-useful, and i look forward to using it.
Simon Hibbs 16 years, 10 months ago  # | flag

XML config files - Blech! What a great recipe! I can think of lots of handy uses for this, but I have to say config files isn't one of them. Keeping your commas streight is nothing compared to manualy editing an XML file and keeping it well-formed by eye. YAML is the way to go for config notation, but even so this is an excelent idea. Many thanks.

Thunder Chen 16 years, 8 months ago  # | flag

I have wrote a similiar scripts based elementtree of python2.5.

with which you can change a xml file to a object dict, and use it in a OO way, like root.node.data. not support write to xml file

http://code.google.com/p/xml2dict/
Kevin Kreiser 11 years, 11 months ago  # | flag

I've found this implementation very helpful for creating dicts out of xml. I have made a few changes though that add a little bit of functionality. Namely I've changed it to that if you have text between two nodes such as: <item>someValue</item> the resulting dict will contain this attribute as: {"item": {"v":"someValue"}}. The second addition was to remove the extra lists generated in the final result (list_to_dict function). Previously a final result might look something like: {'node': [{'subNode': [{'attrib1': 'x', 'attrib2': 'y'}]}]}. With my changes the result will remove the extra lists: {'node': {'subNode': {'attrib1': 'x', 'attrib2': 'y'}}} but return constructions such as {'item': [{'x': '1'}, {'x': '2'}]} in which multiple nodes have the same name. Here are the bits that I changed are in the following comment (stupid char limit on comments..)

Kevin Kreiser 11 years, 11 months ago  # | flag

Code with changes from comment above:

from xml.parsers import expat

def dict_from_xml(xml):
    """ Load a dict from a XML string """

    def list_to_dict(l, ignore_root = True):
        """ Convert our internal format list to a dict. We need this
             because we use a list as a intermediate format during xml load """
        root_dict = {}
        inside_dict = {}
        # index 0: node name
        # index 1: attributes list
        # index 2: children node list
        root_dict[l[0]] = inside_dict
        inside_dict.update(l[1])
        # if it's a node containing lot's of nodes with same name,
        # like <list><item/><item/><item/><item/><item/></list>
        for x in l[2]:
            d = list_to_dict(x, False)
            for k, v in d.iteritems():
                #we have never seen this key before so just keep the value
                if not inside_dict.has_key(k):
                    inside_dict[k] = v
                #we only saw this key once before so we need to start a list of them
                elif isinstance(inside_dict[k], dict):
                    inside_dict[k] = [inside_dict[k], v]
                #we already have a list so keep the next item in it
                else:
                    inside_dict[k].append(v)


        ret = root_dict.values()[0] if ignore_root else root_dict

        return ret

    class M:
        """ This is our expat event sink """
        def __init__(self):
            self.lists_stack = []
            self.current_list = None
        def start_element(self, name, attrs):
            l = []
            # root node?
            if self.current_list is None:
                self.current_list = [name, attrs, l]
            else:
                self.current_list.append([name, attrs, l])

            self.lists_stack.append(self.current_list)
            self.current_list = l
            pass
        def end_element(self, name):
            self.current_list = self.lists_stack.pop()
            pass
        def char_data(self, data):
            #TODO: allow text in the root node
            if self.current_list is not None:
                #grab the most recent lists attribs
                recentList = self.lists_stack[len(self.lists_stack) - 1]
                attrs = recentList[len(recentList) - 1][1]
                #see what kind of name we can use
                keyName = u'value1'
                #TODO: make it not fail to put something just because all the possiblities of keyName were taken
                for i in range(1, len(keyName)):
                    #can we use this key name
                    if attrs.has_key(keyName[:i]) == False:
                        #insert it as another attribute
                        attrs[keyName[:i]] = data
                        break
            pass

    p = expat.ParserCreate()
    m = M()

    p.StartElementHandler = m.start_element
    p.EndElementHandler = m.end_element
    p.CharacterDataHandler = m.char_data

    p.Parse(xml)

    d = list_to_dict(m.current_list)

    return d