import re import xml.sax.handler def xml2obj(src): """ A simple function to converts XML data into native Python object. """ non_id_char = re.compile('[^_0-9a-zA-Z]') def _name_mangle(name): return non_id_char.sub('_', name) class DataNode(object): def __init__(self): self._attrs = {} # XML attributes and child elements self.data = None # child text data def __len__(self): # treat single element as a list of 1 return 1 def __getitem__(self, key): if isinstance(key, basestring): return self._attrs.get(key,None) else: return [self][key] def __contains__(self, name): return self._attrs.has_key(name) def __nonzero__(self): return bool(self._attrs or self.data) def __getattr__(self, name): if name.startswith('__'): # need to do this for Python special methods??? raise AttributeError(name) return self._attrs.get(name,None) def _add_xml_attr(self, name, value): if name in self._attrs: # multiple attribute of the same name are represented by a list children = self._attrs[name] if not isinstance(children, list): children = [children] self._attrs[name] = children children.append(value) else: self._attrs[name] = value def __str__(self): return self.data or '' def __repr__(self): items = sorted(self._attrs.items()) if self.data: items.append(('data', self.data)) return u'{%s}' % ', '.join([u'%s:%s' % (k,repr(v)) for k,v in items]) class TreeBuilder(xml.sax.handler.ContentHandler): def __init__(self): self.stack = [] self.root = DataNode() self.current = self.root self.text_parts = [] def startElement(self, name, attrs): self.stack.append((self.current, self.text_parts)) self.current = DataNode() self.text_parts = [] # xml attributes --> python attributes for k, v in attrs.items(): self.current._add_xml_attr(_name_mangle(k), v) def endElement(self, name): text = ''.join(self.text_parts).strip() if text: self.current.data = text if self.current._attrs: obj = self.current else: # a text only node is simply represented by the string obj = text or '' self.current, self.text_parts = self.stack.pop() self.current._add_xml_attr(_name_mangle(name), obj) def characters(self, content): self.text_parts.append(content) builder = TreeBuilder() if isinstance(src,basestring): xml.sax.parseString(src, builder) else: xml.sax.parse(src, builder) return builder.root._attrs.values()[0]