import array
import collections
import itertools
import pprint
# Placeholder constants
FREE = -1
DUMMY = -2
NOT_FOUND = -3
class Dict(collections.MutableMapping):
'Space efficient dictionary with fast iteration and cheaper resizes.'
@staticmethod
def _gen_probes(hashvalue, n):
'Same sequence of probes used in the current dictionary design'
PERTURB_SHIFT = 5
if hashvalue < 0:
hashvalue = int(2 ** 64 - hashvalue)
i = hashvalue & (n-1)
yield int(i)
perturb = hashvalue
while True:
i = 5 * i + perturb + 1
yield int(i & (n-1))
perturb >>= PERTURB_SHIFT
def _lookup(self, key, hashvalue):
'Same lookup logic as currently used in real dicts'
assert len(self.keylist) < len(self.indices) # At least one open slot
freeslot = None
for i in self._gen_probes(hashvalue, len(self.indices)):
index = self.indices[i]
if index == FREE:
if freeslot is not None:
i = freeslot
return (NOT_FOUND, i)
elif index == DUMMY:
freeslot = i
elif (self.keylist[index] is key or
self.hashlist[index] == hashvalue
and self.keylist[index] == key):
return (index, i)
@staticmethod
def _make_index(n):
'New sequence of indices using the smallest possible datatype'
if n <= 2**7: return array.array('b', [FREE]) * n # signed char
if n <= 2**15: return array.array('h', [FREE]) * n # signed short
if n <= 2**31: return array.array('l', [FREE]) * n # signed long
return [FREE] * n # python integers
def _resize(self, n):
'''Reindex the existing hash/key/value entries.
Entries do not get moved, they only get new indices.
No calls are made to hash() or __eq__().
'''
n = 2 ** n.bit_length()
self.indices = self._make_index(n)
for index, hashvalue in enumerate(self.hashlist):
for i in Dict._gen_probes(hashvalue, n):
if self.indices[i] == FREE:
break
self.indices[i] = index
def __init__(self, *args, **kwds):
self.indices = self._make_index(8)
self.hashlist = []
self.keylist = []
self.valuelist = []
self.size = 0
self.update(*args, **kwds)
def __getitem__(self, key):
hashvalue = hash(key)
index, i = self._lookup(key, hashvalue)
if index == NOT_FOUND:
raise KeyError(key)
return self.valuelist[index]
def __setitem__(self, key, value):
hashvalue = hash(key)
index, i = self._lookup(key, hashvalue)
if index == NOT_FOUND:
self.indices[i] = self.size
self.hashlist.append(hashvalue)
self.keylist.append(key)
self.valuelist.append(value)
self.size += 1
if self.indices.count(FREE) * 3 < len(self.indices):
self._resize(2 * len(self))
else:
self.valuelist[index] = value
def __delitem__(self, key):
hashvalue = hash(key)
index, i = self._lookup(key, hashvalue)
if index == NOT_FOUND:
raise KeyError(key)
self.indices[i] = DUMMY
self.size -= 1
# If needed, swap with the lastmost entry to avoid leaving a "hole"
if index != self.size:
lasthash = self.hashlist[-1]
lastkey = self.keylist[-1]
lastvalue = self.valuelist[-1]
lastindex, j = self._lookup(lastkey, lasthash)
assert lastindex != NOT_FOUND and i != j
self.indices[j] = index
self.hashlist[index] = lasthash
self.keylist[index] = lastkey
self.valuelist[index] = lastvalue
# Remove the lastmost entry
self.hashlist.pop()
self.keylist.pop()
self.valuelist.pop()
def __iter__(self):
for key in self.keylist:
yield key
def __len__(self):
return self.size
def __repr__(self):
return 'Dict(%r)' % self.items()
def show_structure(self):
'Diagnostic method. Not part of the API.'
print '=' * 50
print self
print 'Indices:', self.indices
entries = itertools.izip_longest(
xrange(len(self.hashlist)),
self.hashlist, self.keylist, self.valuelist,
fillvalue = ':ERROR:')
pprint.pprint(list(entries))
print '-' * 50
if __name__ == '__main__':
d = Dict([('timmy', 'red'), ('barry', 'green'), ('guido', 'blue')])
d.show_structure()
Diff to Previous Revision
--- revision 10 2012-12-17 07:20:59
+++ revision 11 2012-12-18 07:18:48
@@ -6,6 +6,7 @@
# Placeholder constants
FREE = -1
DUMMY = -2
+NOT_FOUND = -3
class Dict(collections.MutableMapping):
'Space efficient dictionary with fast iteration and cheaper resizes.'
@@ -29,18 +30,17 @@
assert len(self.keylist) < len(self.indices) # At least one open slot
freeslot = None
for i in self._gen_probes(hashvalue, len(self.indices)):
- if self.indices[i] == FREE:
+ index = self.indices[i]
+ if index == FREE:
if freeslot is not None:
i = freeslot
- return (False, i)
- elif self.indices[i] == DUMMY:
+ return (NOT_FOUND, i)
+ elif index == DUMMY:
freeslot = i
- else:
- index = self.indices[i]
- if (self.keylist[index] is key or
- self.hashlist[index] == hashvalue
- and self.keylist[index] == key):
- return (True, i)
+ elif (self.keylist[index] is key or
+ self.hashlist[index] == hashvalue
+ and self.keylist[index] == key):
+ return (index, i)
@staticmethod
def _make_index(n):
@@ -48,7 +48,7 @@
if n <= 2**7: return array.array('b', [FREE]) * n # signed char
if n <= 2**15: return array.array('h', [FREE]) * n # signed short
if n <= 2**31: return array.array('l', [FREE]) * n # signed long
- return [FREE] * n
+ return [FREE] * n # python integers
def _resize(self, n):
'''Reindex the existing hash/key/value entries.
@@ -72,13 +72,17 @@
self.size = 0
self.update(*args, **kwds)
+ def __getitem__(self, key):
+ hashvalue = hash(key)
+ index, i = self._lookup(key, hashvalue)
+ if index == NOT_FOUND:
+ raise KeyError(key)
+ return self.valuelist[index]
+
def __setitem__(self, key, value):
hashvalue = hash(key)
- found, i = self._lookup(key, hashvalue)
- if found:
- index = self.indices[i]
- self.valuelist[index] = value
- else:
+ index, i = self._lookup(key, hashvalue)
+ if index == NOT_FOUND:
self.indices[i] = self.size
self.hashlist.append(hashvalue)
self.keylist.append(key)
@@ -86,38 +90,31 @@
self.size += 1
if self.indices.count(FREE) * 3 < len(self.indices):
self._resize(2 * len(self))
-
- def __getitem__(self, key):
- hashvalue = hash(key)
- found, i = self._lookup(key, hashvalue)
- if found:
- index = self.indices[i]
- return self.valuelist[index]
else:
- raise KeyError(key)
+ self.valuelist[index] = value
def __delitem__(self, key):
hashvalue = hash(key)
- found, i = self._lookup(key, hashvalue)
- if found:
- index = self.indices[i]
- self.indices[i] = DUMMY
- self.size -= 1
- if index != self.size:
- lasthash = self.hashlist[-1]
- lastkey = self.keylist[-1]
- lastvalue = self.valuelist[-1]
- found, j = self._lookup(lastkey, lasthash)
- assert found and i != j
- self.indices[j] = index
- self.hashlist[index] = lasthash
- self.keylist[index] = lastkey
- self.valuelist[index] = lastvalue
- self.hashlist.pop()
- self.keylist.pop()
- self.valuelist.pop()
- else:
+ index, i = self._lookup(key, hashvalue)
+ if index == NOT_FOUND:
raise KeyError(key)
+ self.indices[i] = DUMMY
+ self.size -= 1
+ # If needed, swap with the lastmost entry to avoid leaving a "hole"
+ if index != self.size:
+ lasthash = self.hashlist[-1]
+ lastkey = self.keylist[-1]
+ lastvalue = self.valuelist[-1]
+ lastindex, j = self._lookup(lastkey, lasthash)
+ assert lastindex != NOT_FOUND and i != j
+ self.indices[j] = index
+ self.hashlist[index] = lasthash
+ self.keylist[index] = lastkey
+ self.valuelist[index] = lastvalue
+ # Remove the lastmost entry
+ self.hashlist.pop()
+ self.keylist.pop()
+ self.valuelist.pop()
def __iter__(self):
for key in self.keylist:
@@ -132,7 +129,7 @@
def show_structure(self):
'Diagnostic method. Not part of the API.'
print '=' * 50
- print 'Dict:', self
+ print self
print 'Indices:', self.indices
entries = itertools.izip_longest(
xrange(len(self.hashlist)),