Welcome, guest | Sign In | My Account | Store | Cart

This receipe provides lightweight functions for pickling objects to a DOM structure and vice versa. I use it in connection with <a href="http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/286150">Hierarchical Objecs</a> from receipe #286150.

<strong>This receipe makes use of eval(), so do not unpickle untrusted xml documents !!!</strong>. I add some secret checksums to my documents which I check before unpickling.

Python, 200 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
import xml.dom.minidom as dom

class PickleMeToXML(object): pass

# helper function

def getType(obj):
    """ generates string representation of class of obj 
        discarding decoration """
    return str(obj.__class__).split("'")[1].split(".")[-1]

_easyToPickle = [ "int", "float", "str" ]

_isCallable = lambda o: hasattr(o, "__call__")

# 
#   pickling 
# 

def _pickleDictItems(root, node, fabric):
    for key, value in root.items():
        tempnode = fabric.createElement("item")
        tempnode.appendChild(pickle(key, fabric, "key"))
        tempnode.appendChild(pickle(value, fabric, "value"))
        node.appendChild(tempnode)

def _pickleListItems(root, node, fabric):
    for idx, obj in enumerate(root):
        tempnode = pickle(obj, fabric, "item")
        tempnode.attributes["index"] = str(idx)
        node.appendChild(tempnode)

_pickleTupleItems = _pickleListItems

def pickle(root, fabric, elementName="root"):

    node = fabric.createElement(elementName)
    typeStr = getType(root)
    node.attributes["type"]=typeStr

    if isinstance(root, PickleMeToXML):
        node = _pickleObjectWithAttributes(node, root, fabric, elementName)
    elif typeStr in _easyToPickle:
        node.appendChild(fabric.createTextNode(str(root)))
    elif isinstance(root, dict):
        _pickleDictItems(root, node, fabric)
    elif isinstance(root, list):
        _pickleListItems(root, node, fabric)
    elif isinstance(root, tuple):
        _pickleTupleItems(root, node, fabric)
    else:
        # fallback handler
        node.appendChild(fabric.createTextNode(repr(root)))
    return node

def _pickleObjectWithAttributes(node, root, fabric, elementName):

    # pickle all members or just a subset ??? 
    if hasattr(root, "__pickle_to_xml__"):
        attributesToPickle = root.__pickle_to_xml__
    else:
        # avoid members which are python internal
        attributesToPickle = [ name for name in dir(root) if not name.startswith("__") ]

    for name in attributesToPickle: 
        obj = getattr(root, name)

        # do not pickle member functions
        if _isCallable(obj): continue

        # is there some special encoding method ??
        if hasattr(root, "_xml_encode_%s" % name):
            value = getattr(root, "_xml_encode_%s" % name)()
            node.appendChild(fabric.createTextNode(value))
        else:
            node.appendChild(pickle(obj, fabric, name))
    return node

#
#   unpickling 
#

# helper functions

def _getElementChilds(node, doLower = 1):
    """ returns list of (tagname, element) for all element childs of node """

    dolow = doLower and (lambda x:x.lower()) or (lambda x:x)
    return [ (dolow(no.tagName), no) for no in node.childNodes if no.nodeType != no.TEXT_NODE ]

def _getText(nodelist):
    """ returns collected and stripped text of textnodes among nodes in nodelist """
    rc = ""
    for node in nodelist:
        if node.nodeType == node.TEXT_NODE:
            rc = rc + node.data
    return rc.strip()

# main unpickle function

def unpickle(node):

    typeName= node.attributes["type"].value

    if typeName in _easyToPickle: 
        initValue = _getText(node.childNodes)
        value = eval("%s(%r)" % (typeName, initValue))
        return value 
    elif typeName=="tuple":
        return _unpickleTuple(node)
    elif typeName=="list":
        return _unpickleList(node)
    elif typeName=="dict":
        return _unpickleDict(node)
    else:
        obj = eval("object.__new__(%s)" % typeName)
        for name, element in _getElementChilds(node):
            setattr(obj, name, unpickle(element))
        return obj
        
class XMLUnpicklingException(Exception): pass

def _unpickleList(node):
    li = []
    # collect entries, you can not assume that the
    # members of the list appear in the right order !
    for name, element in _getElementChilds(node):
        if name != "item":
            raise XMLUnpicklingException()
        idx = int(element.attributes["index"].value)
        obj = unpickle(element)
        li.append((idx, obj))

    # rebuild list with right order
    li.sort()
    return [ item[1] for item in li ]

def _unpickleTuple(node):
    return tuple(_unpickleList(node))

def _unpickleDict(node):
    dd = dict()
    for name, element in _getElementChilds(node):
        if name != "item":
            raise XMLUnpicklingException()
        childList = _getElementChilds(element)
        if len(childList) != 2:
            raise XMLUnpicklingException()
        for name, element in childList:
            if name=="key":
                key = unpickle(element)
            elif name=="value":
                value = unpickle(element)
        dd[key]=value
    return dd

if __name__=="__main__":

    # build some nested data structures for testing purposes

    class RootObject(PickleMeToXML):

        counter = 3
        def __init__(self):
            self.sub = SubObject()
            self.data = dict(xyz=4711)
            self.objlist = [ 1, 2, SubObject(), DetailObject() ]

    class SubObject(PickleMeToXML):

        __pickle_to_xml__ = ["values", "detail"]

        def __init__(self):
            self.values = (3.12, 4711, 8.15)
            self.z = "uwe"
            self.detail = DetailObject()

    class DetailObject(PickleMeToXML):

        statement = "1 < 2 is true"
        blablaliste = ["a", "b", "c"]

        def _xml_encode_statement(self):
            # encrypt attribute 'statement'
            return self.statement[::-1]

        def _xml_decode_statement(self, value):
            # decrypt value
            self.statement = value[::-1]

    # testing procedure:
    # convert objects -> xml -> objects -> xml

    obj = RootObject()
    node =pickle(root=obj, fabric=dom.Document())
    x= unpickle(node)
    node = pickle(root = x, fabric=dom.Document())

    # that is how the xml document looks like:
    print node.toprettyxml()

I am just writing an application handling hierarchical structured data. I started writing some .toDOM and .fromDOM methods for each object and discovered that I duplicated lots of code, so this receipe arose. It shows how to implement the visitor pattern in Python. This receipe is more leightweight than those XML picklers I found.

You just have to derive your objects from "PickleMeToXML", pickle() will do the rest for you. This base class is just a marker and will not add any members to your class.

Instances of PickleMeToXML are checked if there is an attribute __pickle_me_to_xml__ which is a list of attribute names to pickle. If there is not such a list all attributes which are ints, doubles, strings, lists, tuples, dictionaries or instances of PickleMeToXML are pickled.

You can do some special pickling / unpickling of an attribute "xxx" by implementing methods _xml_encode_xxx and _xml_decode_xxx as shown in the example above.

1 comment

Fredrik Corneliusson 18 years, 2 months ago  # | flag

Adding unicode type to _easyToPickle. I had trouble with unpickle unicode type so I added it to the _easyToPickle list and it seems to work.

_easyToPickle = [ "int", "float", "str","unicode" ]
Created by Uwe Schmitt on Fri, 3 Dec 2004 (PSF)
Python recipes (4591)
Uwe Schmitt's recipes (4)

Required Modules

Other Information and Tasks