This receipe provides lightweight functions for pickling objects to a DOM structure and vice versa. I use it in connection with <a href="http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/286150">Hierarchical Objecs</a> from receipe #286150.
<strong>This receipe makes use of eval(), so do not unpickle untrusted xml documents !!!</strong>. I add some secret checksums to my documents which I check before unpickling.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 | import xml.dom.minidom as dom
class PickleMeToXML(object): pass
# helper function
def getType(obj):
""" generates string representation of class of obj
discarding decoration """
return str(obj.__class__).split("'")[1].split(".")[-1]
_easyToPickle = [ "int", "float", "str" ]
_isCallable = lambda o: hasattr(o, "__call__")
#
# pickling
#
def _pickleDictItems(root, node, fabric):
for key, value in root.items():
tempnode = fabric.createElement("item")
tempnode.appendChild(pickle(key, fabric, "key"))
tempnode.appendChild(pickle(value, fabric, "value"))
node.appendChild(tempnode)
def _pickleListItems(root, node, fabric):
for idx, obj in enumerate(root):
tempnode = pickle(obj, fabric, "item")
tempnode.attributes["index"] = str(idx)
node.appendChild(tempnode)
_pickleTupleItems = _pickleListItems
def pickle(root, fabric, elementName="root"):
node = fabric.createElement(elementName)
typeStr = getType(root)
node.attributes["type"]=typeStr
if isinstance(root, PickleMeToXML):
node = _pickleObjectWithAttributes(node, root, fabric, elementName)
elif typeStr in _easyToPickle:
node.appendChild(fabric.createTextNode(str(root)))
elif isinstance(root, dict):
_pickleDictItems(root, node, fabric)
elif isinstance(root, list):
_pickleListItems(root, node, fabric)
elif isinstance(root, tuple):
_pickleTupleItems(root, node, fabric)
else:
# fallback handler
node.appendChild(fabric.createTextNode(repr(root)))
return node
def _pickleObjectWithAttributes(node, root, fabric, elementName):
# pickle all members or just a subset ???
if hasattr(root, "__pickle_to_xml__"):
attributesToPickle = root.__pickle_to_xml__
else:
# avoid members which are python internal
attributesToPickle = [ name for name in dir(root) if not name.startswith("__") ]
for name in attributesToPickle:
obj = getattr(root, name)
# do not pickle member functions
if _isCallable(obj): continue
# is there some special encoding method ??
if hasattr(root, "_xml_encode_%s" % name):
value = getattr(root, "_xml_encode_%s" % name)()
node.appendChild(fabric.createTextNode(value))
else:
node.appendChild(pickle(obj, fabric, name))
return node
#
# unpickling
#
# helper functions
def _getElementChilds(node, doLower = 1):
""" returns list of (tagname, element) for all element childs of node """
dolow = doLower and (lambda x:x.lower()) or (lambda x:x)
return [ (dolow(no.tagName), no) for no in node.childNodes if no.nodeType != no.TEXT_NODE ]
def _getText(nodelist):
""" returns collected and stripped text of textnodes among nodes in nodelist """
rc = ""
for node in nodelist:
if node.nodeType == node.TEXT_NODE:
rc = rc + node.data
return rc.strip()
# main unpickle function
def unpickle(node):
typeName= node.attributes["type"].value
if typeName in _easyToPickle:
initValue = _getText(node.childNodes)
value = eval("%s(%r)" % (typeName, initValue))
return value
elif typeName=="tuple":
return _unpickleTuple(node)
elif typeName=="list":
return _unpickleList(node)
elif typeName=="dict":
return _unpickleDict(node)
else:
obj = eval("object.__new__(%s)" % typeName)
for name, element in _getElementChilds(node):
setattr(obj, name, unpickle(element))
return obj
class XMLUnpicklingException(Exception): pass
def _unpickleList(node):
li = []
# collect entries, you can not assume that the
# members of the list appear in the right order !
for name, element in _getElementChilds(node):
if name != "item":
raise XMLUnpicklingException()
idx = int(element.attributes["index"].value)
obj = unpickle(element)
li.append((idx, obj))
# rebuild list with right order
li.sort()
return [ item[1] for item in li ]
def _unpickleTuple(node):
return tuple(_unpickleList(node))
def _unpickleDict(node):
dd = dict()
for name, element in _getElementChilds(node):
if name != "item":
raise XMLUnpicklingException()
childList = _getElementChilds(element)
if len(childList) != 2:
raise XMLUnpicklingException()
for name, element in childList:
if name=="key":
key = unpickle(element)
elif name=="value":
value = unpickle(element)
dd[key]=value
return dd
if __name__=="__main__":
# build some nested data structures for testing purposes
class RootObject(PickleMeToXML):
counter = 3
def __init__(self):
self.sub = SubObject()
self.data = dict(xyz=4711)
self.objlist = [ 1, 2, SubObject(), DetailObject() ]
class SubObject(PickleMeToXML):
__pickle_to_xml__ = ["values", "detail"]
def __init__(self):
self.values = (3.12, 4711, 8.15)
self.z = "uwe"
self.detail = DetailObject()
class DetailObject(PickleMeToXML):
statement = "1 < 2 is true"
blablaliste = ["a", "b", "c"]
def _xml_encode_statement(self):
# encrypt attribute 'statement'
return self.statement[::-1]
def _xml_decode_statement(self, value):
# decrypt value
self.statement = value[::-1]
# testing procedure:
# convert objects -> xml -> objects -> xml
obj = RootObject()
node =pickle(root=obj, fabric=dom.Document())
x= unpickle(node)
node = pickle(root = x, fabric=dom.Document())
# that is how the xml document looks like:
print node.toprettyxml()
|
I am just writing an application handling hierarchical structured data. I started writing some .toDOM and .fromDOM methods for each object and discovered that I duplicated lots of code, so this receipe arose. It shows how to implement the visitor pattern in Python. This receipe is more leightweight than those XML picklers I found.
You just have to derive your objects from "PickleMeToXML", pickle() will do the rest for you. This base class is just a marker and will not add any members to your class.
Instances of PickleMeToXML are checked if there is an attribute __pickle_me_to_xml__ which is a list of attribute names to pickle. If there is not such a list all attributes which are ints, doubles, strings, lists, tuples, dictionaries or instances of PickleMeToXML are pickled.
You can do some special pickling / unpickling of an attribute "xxx" by implementing methods _xml_encode_xxx and _xml_decode_xxx as shown in the example above.
Adding unicode type to _easyToPickle. I had trouble with unpickle unicode type so I added it to the _easyToPickle list and it seems to work.