Common python services such as pickle, deepcopy and comparison tests either fail entirely or do not scale for highly recursive data structures. This recipe presents a reversible "flatten" transformation that allows for such operations.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | #!/usr/bin/env python
try:
set
except NameError:
from sets import Set as set
class TankId(int):
def __new__(self, i):
return int.__new__( TankId, i )
class Tank:
"""
Temporarily change references to TankId's so as to avoid nesting/recursion.
Host the client instance and keep track of all reference changes.
"""
def __init__(self, root = None):
" root : client instance to flatten "
self.tankid = TankId(0) # next TankId to use
# map an object's id to a TankId (temporary)
self.items = {}
# map a TankId to an object
self.ids = {}
# we are done flattening when this list is empty again
self.todo = []
if root is not None:
self.flatten( root )
def __cmp__(self, other):
if not isinstance( other, Tank ):
return cmp( self, other )
# all our state is in ids, which is "flat"
return cmp( self.ids, other.ids )
###########################################
# before pickle
def flat_list(self, item):
for i in range(len(item)):
item[i] = self.queue( item[i] )
def flat_set(self, item):
newitem = set()
for elem in item:
newitem.add( self.queue( elem ) )
item.clear()
item.update(newitem)
def flat_dict(self, item):
for key in item.keys():
item[key] = self.queue( item[key] )
def flat_attrs(self, item):
for key in item.__dict__.keys():
item.__dict__[key] = self.queue( item.__dict__[key] )
def queue(self, item):
assert not isinstance( item, TankId )
if not ( isinstance(item, list) or isinstance(item, set) or isinstance(item, dict) or hasattr(item, "__dict__") ):
return item
if id(item) not in self.items:
self.todo.append(item)
tankid = self.tankid
self.items[ id(item) ] = tankid
self.ids[ tankid ] = item
self.tankid = TankId(tankid + 1)
else:
tankid = self.items[ id(item) ]
return tankid
def flatten(self, root):
" flatten the root object and all it's references "
self.root = root
self.queue( self.root )
while self.todo:
item = self.todo.pop(0)
if isinstance(item, list):
self.flat_list(item)
if isinstance(item, set):
self.flat_set(item)
if isinstance(item, dict):
self.flat_dict(item)
if hasattr(item, "__dict__"):
self.flat_attrs(item)
# tuple ? other immutables ? __set/getstate__ ?
# flush items (do not pickle them)
self.items = {}
###########################################
# after pickle
def lift_list(self, item):
for i in range(len(item)):
item[i] = self.recover( item[i] )
def lift_set(self, item):
newitem = set()
for elem in item:
newitem.add( self.recover( elem ) )
item.clear()
item.update(newitem)
def lift_dict(self, item):
for key in item.keys():
item[key] = self.recover( item[key] )
def lift_attrs(self, item):
for key in item.__dict__.keys():
item.__dict__[key] = self.recover( item.__dict__[key] )
def recover(self, item):
if isinstance( item, TankId ):
item = self.ids[ item ]
assert not isinstance( item, TankId )
return item
def lift(self):
" restore the root object to it's un-flattened state "
for item in self.ids.values():
if isinstance(item, list):
self.lift_list(item)
if isinstance(item, set):
self.lift_set(item)
if isinstance(item, dict):
self.lift_dict(item)
if hasattr(item, "__dict__"):
self.lift_attrs(item)
# tuple ? other immutables ? __set/getstate__ ?
return self.root
###################################
# for testing
def recursive( N ):
recursive = [ [] for i in range(N) ]
for i in range(N):
for j in range(N):
recursive[i].append( recursive[j] )
return recursive
def nested( N ):
root = []
nest = root
for i in range(N):
nest.append( [] )
nest = nest[0]
return root
|
Both deepcopy and pickle use the python stack to recursively search object stores. This means they do not scale. The comparison operators handle deeply nested structures (before python2.4) but they fail entirely on some (even small) recursive data.
>>> from copy import deepcopy
>>> a = nested(4096) # big and nasty
>>> b = deepcopy(a)
RuntimeError: maximum recursion depth exceeded
>>> b = nested(4096) # ok make a copy ourselves...
>>> a==b # should be true
Traceback (most recent call last):
File "", line 1, in ?
RuntimeError: maximum recursion depth exceeded in cmp
>>> a = recursive(2) # something else now, much smaller
>>> a
[[,
[,
]],
[[,
],
]]
>>> b = deepcopy(a) # works this time
>>> a==b # fails for even this small example
Traceback (most recent call last):
File "", line 1, in ?
RuntimeError: maximum recursion depth exceeded in cmp
We could try using sys.setrecursionlimit, but that is limited by the platform stack size.
Now we use the Tank to "flatten" a:
>>> a = nested(4096)
>>> a_tank = Tank( a )
>>> b_tank = deepcopy( a_tank ) # copy works now
>>> a_tank == b_tank # cmp works
True
>>>
Can pickle / unpickle a_tank here.
>>> a = a_tank.lift() # restore object to it's original state
>>>
A more complete implementation might handle replacing tuples in-place with their flat versions (this is difficult to do in a scalable way), and perhaps use the __getstate__/__setstate__ protocol.
No set support. This library is not able to flatten sets. Easy fix should help!