Welcome, guest | Sign In | My Account | Store | Cart
import array
import collections
import itertools
import pprint

# Placeholder constants
FREE
= -1
DUMMY
= -2

def round_upto_powtwo(n):
   
'Round-up to the next power of two'
    result
= 1
   
while result <= n:
        result
<<= 1
   
return result

class Dict(collections.MutableMapping):
   
'''Space efficient dictionary with fast iteration
       and cheap resizes (minimum data movement)
    '''


   
@staticmethod
   
def _gen_probes(hashvalue, n):
       
'Same sequence of probes used in the current dictionary design'
        PERTURB_SHIFT
= 5
       
if hashvalue < 0:
            hashvalue
= int(2 ** 64 - hashvalue)
        i
= hashvalue & (n-1)
       
yield int(i)
        perturb
= hashvalue
       
while True:
            i
= ((i << 2) + i + perturb + 1)
           
yield int(i & (n-1))
            perturb
>>= PERTURB_SHIFT

   
def _lookup(self, key, hashvalue):
       
'Same lookup logic as currently used in real dicts'
       
assert len(self.keylist) < len(self.indices)   # At least one open slot
        freeslot
= None
       
for i in self._gen_probes(hashvalue, len(self.indices)):
           
if self.indices[i] is FREE:
               
if freeslot is not None:
                    i
= freeslot
               
return (False, i)
           
elif self.indices[i] is DUMMY:
                freeslot
= i
           
else:
                index
= self.indices[i]
               
if (self.keylist[index] is key or
                   
self.hashlist[index] == hashvalue
                   
and self.keylist[index] == key):
                   
return (True, i)

   
def _next_open_index(self):
       
'''New logic:  Store hash/key/value entries in a dense list.
           Append a new entry to the end of the list. '''

       
assert self.size == len(self.keylist)
        index
= len(self.keylist)
       
self.hashlist.append(None)
       
self.keylist.append(None)
       
self.valuelist.append(None)
       
return index

   
@staticmethod
   
def _make_index(n):
       
'New sequence of indices using the smallest possible datatype'
        typecode
= 'b' if n <= 128 else 'h' if n <= 65536 else 'l'
       
return array.array(typecode, itertools.repeat(FREE, n))

   
def _resize(self, n):
       
'''Reindex the existing hash/key/value entries.
           Entries do not get moved, they only get new indices.
           No calls are made to hash() or __eq__().

        '''

        n
= round_upto_powtwo(n)
       
self.indices = self._make_index(n)
       
for index, hashvalue in enumerate(self.hashlist):
           
for i in Dict._gen_probes(hashvalue, n):
               
if self.indices[i] is FREE:
                   
break
           
self.indices[i] = index

   
def __init__(self, *args, **kwds):
       
self.indices = self._make_index(8)
       
self.hashlist = []
       
self.keylist = []
       
self.valuelist = []
       
self.size = 0
       
self.update(*args, **kwds)

   
def __setitem__(self, key, value):
        hashvalue
= hash(key)
        found
, i = self._lookup(key, hashvalue)
       
if found:
            index
= self.indices[i]
           
self.valuelist[index] = value
       
else:
            index
= self._next_open_index()
           
self.indices[i] = index
           
self.hashlist[index] = hashvalue
           
self.keylist[index] = key
           
self.valuelist[index] = value
           
self.size += 1
           
if self.indices.count(FREE) * 3 < len(self.indices):
               
self._resize(2 * len(self))

   
def __getitem__(self, key):
        hashvalue
= hash(key)
        found
, i = self._lookup(key, hashvalue)
       
if found:
            index
= self.indices[i]
           
return self.valuelist[index]
       
else:
           
raise KeyError(key)

   
def __delitem__(self, key):
        hashvalue
= hash(key)
        found
, i = self._lookup(key, hashvalue)
       
if found:
            index
= self.indices[i]
           
self.indices[i] = DUMMY
           
self.size -= 1
           
if index != self.size:
                lasthash
= self.hashlist[-1]
                lastkey
= self.keylist[-1]
                lastvalue
= self.valuelist[-1]
                found
, j = self._lookup(lastkey, lasthash)
               
assert found and i != j
               
self.indices[j] = index
               
self.hashlist[index] = lasthash
               
self.keylist[index] = lastkey
               
self.valuelist[index] = lastvalue
           
self.hashlist.pop()
           
self.keylist.pop()
           
self.valuelist.pop()
       
else:
           
raise KeyError(key)

   
def __iter__(self):
       
for key in self.keylist:
           
yield key

   
def __len__(self):
       
return self.size

   
def __repr__(self):
       
return 'Dict(%r)' % self.items()

   
###### Diagnostic methods.  Not part of the API ##############

   
def show_structure(self):
       
print '=' * 50
       
print 'Dict:', self
       
print 'Size in bytes:', self.memory()
       
print 'Indices:', self.indices
        entries
= itertools.izip_longest(
                        xrange
(len(self.hashlist)),
                       
self.hashlist, self.keylist, self.valuelist,
                        fillvalue
= ':ERROR:')
        pprint
.pprint(list(entries))
       
print '-' * 50

   
def memory(self):
       
'Size in bytes assuming a 64-bit build'
       
return (len(self.indices) * self.indices.itemsize
               
+ len(self.hashlist) * 8 * 3)

if __name__ == '__main__':

    d
= Dict([('timmy', 'red'), ('barry', 'green'), ('guido', 'blue')])
    d
.show_structure()

Diff to Previous Revision

--- revision 5 2012-12-11 04:57:48
+++ revision 6 2012-12-11 05:01:33
@@ -69,7 +69,7 @@
 
     
def _resize(self, n):
         
'''Reindex the existing hash/key/value entries.
-           Entries do not get moved, the only get new indices.
+           Entries do not get moved, they only get new indices.
            No calls are made to hash() or __eq__().
 
         '''

History