Welcome, guest | Sign In | My Account | Store | Cart

Common python services such as pickle, deepcopy and comparison tests either fail entirely or do not scale for highly recursive data structures. This recipe presents a reversible "flatten" transformation that allows for such operations.

Python, 157 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/usr/bin/env python

try:
  set
except NameError:
  from sets import Set as set

class TankId(int):
    def __new__(self, i):
        return int.__new__( TankId, i )

class Tank:
    """
    Temporarily change references to TankId's so as to avoid nesting/recursion.
    Host the client instance and keep track of all reference changes.
    """
    def __init__(self, root = None):
        " root : client instance to flatten "

        self.tankid = TankId(0) # next TankId to use

        # map an object's id to a TankId (temporary)
        self.items = {}

        # map a TankId to an object
        self.ids = {}

        # we are done flattening when this list is empty again
        self.todo = []

        if root is not None:
            self.flatten( root )

    def __cmp__(self, other):
        if not isinstance( other, Tank ):
            return cmp( self, other )
        # all our state is in ids, which is "flat"
        return cmp( self.ids, other.ids )

    ###########################################
    # before pickle

    def flat_list(self, item):
        for i in range(len(item)):
            item[i] = self.queue( item[i] )

    def flat_set(self, item):
        newitem = set()
        for elem in item:
            newitem.add( self.queue( elem ) )
        item.clear()
        item.update(newitem)

    def flat_dict(self, item):
        for key in item.keys():
            item[key] = self.queue( item[key] )

    def flat_attrs(self, item):
        for key in item.__dict__.keys():
            item.__dict__[key] = self.queue( item.__dict__[key] )

    def queue(self, item):
        assert not isinstance( item, TankId )
        if not ( isinstance(item, list) or isinstance(item, set) or isinstance(item, dict) or hasattr(item, "__dict__") ):
            return item
        if id(item) not in self.items:
            self.todo.append(item)
            tankid = self.tankid
            self.items[ id(item) ] = tankid
            self.ids[ tankid ] = item
            self.tankid = TankId(tankid + 1)
        else:
            tankid = self.items[ id(item) ]
        return tankid

    def flatten(self, root):
        " flatten the root object and all it's references "

        self.root = root
        self.queue( self.root )
        while self.todo:
            item = self.todo.pop(0)
            if isinstance(item, list):
                self.flat_list(item)
            if isinstance(item, set):
                self.flat_set(item)
            if isinstance(item, dict):
                self.flat_dict(item)
            if hasattr(item, "__dict__"):
                self.flat_attrs(item)
            # tuple ? other immutables ? __set/getstate__ ?

        # flush items (do not pickle them)
        self.items = {}

    ###########################################
    # after pickle

    def lift_list(self, item):
        for i in range(len(item)):
            item[i] = self.recover( item[i] )

    def lift_set(self, item):
        newitem = set()
        for elem in item:
            newitem.add( self.recover( elem ) )
        item.clear()
        item.update(newitem)

    def lift_dict(self, item):
        for key in item.keys():
            item[key] = self.recover( item[key] )

    def lift_attrs(self, item):
        for key in item.__dict__.keys():
            item.__dict__[key] = self.recover( item.__dict__[key] )

    def recover(self, item):
        if isinstance( item, TankId ):
            item = self.ids[ item ]
            assert not isinstance( item, TankId )
        return item

    def lift(self):
        " restore the root object to it's un-flattened state "

        for item in self.ids.values():
            if isinstance(item, list):
                self.lift_list(item)
            if isinstance(item, set):
                self.lift_set(item)
            if isinstance(item, dict):
                self.lift_dict(item)
            if hasattr(item, "__dict__"):
                self.lift_attrs(item)
            # tuple ? other immutables ? __set/getstate__ ?

        return self.root


###################################
# for testing

def recursive( N ):
    recursive = [ [] for i in range(N) ]
    for i in range(N):
        for j in range(N):
            recursive[i].append( recursive[j] )
    return recursive

def nested( N ):
    root = []
    nest = root
    for i in range(N):
        nest.append( [] )
        nest = nest[0]
    return root

Both deepcopy and pickle use the python stack to recursively search object stores. This means they do not scale. The comparison operators handle deeply nested structures (before python2.4) but they fail entirely on some (even small) recursive data.

>>> from copy import deepcopy
>>> a = nested(4096) # big and nasty
>>> b = deepcopy(a)
RuntimeError: maximum recursion depth exceeded



>>> b = nested(4096) # ok make a copy ourselves...
>>> a==b # should be true
Traceback (most recent call last):
  File "", line 1, in ?
RuntimeError: maximum recursion depth exceeded in cmp



>>> a = recursive(2) # something else now, much smaller
>>> a
[[,
  [,
   ]],
 [[,
   ],
  ]]



>>> b = deepcopy(a) # works this time



>>> a==b # fails for even this small example
Traceback (most recent call last):
  File "", line 1, in ?
RuntimeError: maximum recursion depth exceeded in cmp

We could try using sys.setrecursionlimit, but that is limited by the platform stack size.

Now we use the Tank to "flatten" a:

>>> a = nested(4096)
>>> a_tank = Tank( a )
>>> b_tank = deepcopy( a_tank ) # copy works now
>>> a_tank == b_tank # cmp works
True
>>>

Can pickle / unpickle a_tank here.

>>> a = a_tank.lift() # restore object to it's original state
>>>

A more complete implementation might handle replacing tuples in-place with their flat versions (this is difficult to do in a scalable way), and perhaps use the __getstate__/__setstate__ protocol.

1 comment

Radek Szklarczyk 17 years, 11 months ago  # | flag

No set support. This library is not able to flatten sets. Easy fix should help!

Created by Simon Burton on Sun, 29 Aug 2004 (PSF)
Python recipes (4591)
Simon Burton's recipes (3)

Required Modules

Other Information and Tasks