Save space and improve iteration speed by moving the hash/key/value entries to a densely packed array keeping only a sparse array of indices. This eliminates wasted space without requiring any algorithmic changes.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 | import array
import collections
import itertools
# Placeholder constants
FREE = -1
DUMMY = -2
class Dict(collections.MutableMapping):
'Space efficient dictionary with fast iteration and cheap resizes.'
@staticmethod
def _gen_probes(hashvalue, mask):
'Same sequence of probes used in the current dictionary design'
PERTURB_SHIFT = 5
if hashvalue < 0:
hashvalue = -hashvalue
i = hashvalue & mask
yield i
perturb = hashvalue
while True:
i = (5 * i + perturb + 1) & 0xFFFFFFFFFFFFFFFF
yield i & mask
perturb >>= PERTURB_SHIFT
def _lookup(self, key, hashvalue):
'Same lookup logic as currently used in real dicts'
assert self.filled < len(self.indices) # At least one open slot
freeslot = None
for i in self._gen_probes(hashvalue, len(self.indices)-1):
index = self.indices[i]
if index == FREE:
return (FREE, i) if freeslot is None else (DUMMY, freeslot)
elif index == DUMMY:
if freeslot is None:
freeslot = i
elif (self.keylist[index] is key or
self.hashlist[index] == hashvalue
and self.keylist[index] == key):
return (index, i)
@staticmethod
def _make_index(n):
'New sequence of indices using the smallest possible datatype'
if n <= 2**7: return array.array('b', [FREE]) * n # signed char
if n <= 2**15: return array.array('h', [FREE]) * n # signed short
if n <= 2**31: return array.array('l', [FREE]) * n # signed long
return [FREE] * n # python integers
def _resize(self, n):
'''Reindex the existing hash/key/value entries.
Entries do not get moved, they only get new indices.
No calls are made to hash() or __eq__().
'''
n = 2 ** n.bit_length() # round-up to power-of-two
self.indices = self._make_index(n)
for index, hashvalue in enumerate(self.hashlist):
for i in Dict._gen_probes(hashvalue, n-1):
if self.indices[i] == FREE:
break
self.indices[i] = index
self.filled = self.used
def clear(self):
self.indices = self._make_index(8)
self.hashlist = []
self.keylist = []
self.valuelist = []
self.used = 0
self.filled = 0 # used + dummies
def __getitem__(self, key):
hashvalue = hash(key)
index, i = self._lookup(key, hashvalue)
if index < 0:
raise KeyError(key)
return self.valuelist[index]
def __setitem__(self, key, value):
hashvalue = hash(key)
index, i = self._lookup(key, hashvalue)
if index < 0:
self.indices[i] = self.used
self.hashlist.append(hashvalue)
self.keylist.append(key)
self.valuelist.append(value)
self.used += 1
if index == FREE:
self.filled += 1
if self.filled * 3 > len(self.indices) * 2:
self._resize(4 * len(self))
else:
self.valuelist[index] = value
def __delitem__(self, key):
hashvalue = hash(key)
index, i = self._lookup(key, hashvalue)
if index < 0:
raise KeyError(key)
self.indices[i] = DUMMY
self.used -= 1
# If needed, swap with the lastmost entry to avoid leaving a "hole"
if index != self.used:
lasthash = self.hashlist[-1]
lastkey = self.keylist[-1]
lastvalue = self.valuelist[-1]
lastindex, j = self._lookup(lastkey, lasthash)
assert lastindex >= 0 and i != j
self.indices[j] = index
self.hashlist[index] = lasthash
self.keylist[index] = lastkey
self.valuelist[index] = lastvalue
# Remove the lastmost entry
self.hashlist.pop()
self.keylist.pop()
self.valuelist.pop()
def __init__(self, *args, **kwds):
if not hasattr(self, 'keylist'):
self.clear()
self.update(*args, **kwds)
def __len__(self):
return self.used
def __iter__(self):
return iter(self.keylist)
def iterkeys(self):
return iter(self.keylist)
def keys(self):
return list(self.keylist)
def itervalues(self):
return iter(self.valuelist)
def values(self):
return list(self.valuelist)
def iteritems(self):
return itertools.izip(self.keylist, self.valuelist)
def items(self):
return zip(self.keylist, self.valuelist)
def __contains__(self, key):
index, i = self._lookup(key, hash(key))
return index >= 0
def get(self, key, default=None):
index, i = self._lookup(key, hash(key))
return self.valuelist[index] if index >= 0 else default
def popitem(self):
if not self.keylist:
raise KeyError('popitem(): dictionary is empty')
key = self.keylist[-1]
value = self.valuelist[-1]
del self[key]
return key, value
def __repr__(self):
return 'Dict(%r)' % self.items()
def show_structure(self):
'Diagnostic method. Not part of the API.'
print '=' * 50
print self
print 'Indices:', self.indices
for i, row in enumerate(zip(self.hashlist, self.keylist, self.valuelist)):
print i, row
print '-' * 50
if __name__ == '__main__':
d = Dict([('timmy', 'red'), ('barry', 'green'), ('guido', 'blue')])
d.show_structure()
|
The current memory layout for dictionaries is unnecessarily inefficient. It has a sparse table of 24-byte entries containing the hash value, key pointer, and value pointer.
Instead, the 24-byte entries should be stored in a dense table referenced by a sparse table of indices.
For example, the dictionary:
d = {'timmy': 'red', 'barry': 'green', 'guido': 'blue'}
is currently stored as:
entries = [['--', '--', '--'],
[-8522787127447073495, 'barry', 'green'],
['--', '--', '--'],
['--', '--', '--'],
['--', '--', '--'],
[-9092791511155847987, 'timmy', 'red'],
['--', '--', '--'],
[-6480567542315338377, 'guido', 'blue']]
Instead, the data should be organized as follows:
indices = [None, 1, None, None, None, 0, None, 2]
entries = [[-9092791511155847987, 'timmy', 'red'],
[-8522787127447073495, 'barry', 'green'],
[-6480567542315338377, 'guido', 'blue']]
Only the data layout needs to change. The hash table algorithms would stay the same. All of the current optimizations would be kept, including key-sharing dicts and custom lookup functions for string-only dicts. There is no change to the hash functions, the table search order, or collision statistics.
The memory savings are significant (from 30% to 95% compression depending on the how full the table is). Small dicts (size 0, 1, or 2) get the most benefit.
For a sparse table of size t with n entries, the sizes are:
curr_size = 24 * t
new_size = 24 * n + sizeof(index) * t
In the above timmy/barry/guido example, the current size is 192 bytes (eight 24-byte entries) and the new size is 80 bytes (three 24-byte entries plus eight 1-byte indices). That gives 58% compression.
Note, the sizeof(index) can be as small as a single byte for small dicts, two bytes for bigger dicts and up to sizeof(Py_ssize_t) for huge dict.
In addition to space savings, the new memory layout makes iteration faster. Currently, keys(), values(), and items() loop over the sparse table, skipping-over free slots in the hash table. Now, keys/values/items can loop directly over the dense table, using fewer memory accesses.
Another benefit is that resizing is faster and touches fewer pieces of memory. Currently, every hash/key/value entry is moved or copied during a resize. In the new layout, only the indices are updated. For the most part, the hash/key/value entries never move (except for swaps to fill holes left by a deletion).
With the reduced memory footprint, we can also expect better cache utilization.
YMMV: Keep in mind that the above size statics assume a build with 64-bit Py_ssize_t and 64-bit pointers. The space savings percentages are a bit different on other builds. Also, note that in many applications, the size of the data dominates the size of the container (i.e. the weight of a bucket of water is mostly the water, not the bucket).
The entries list uses regular Python lists which over-allocate in order to append() efficiently. The growth pattern for regular Python lists is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ... For larger lists, this over-allocation is no more than 12.5%.
In summary, the memory and speed performance of the entries table is the same as for regular Python lists -- it has the same iteration speed, growth rate, and append/pop performance,
The performance of the index table is the same as for regular Python dictionaries but is substantially more space efficient (from 3 to 24 times more space efficient than regular dictionaries). The smaller size makes resizing substantially faster and it improves cache performance.
In
clear()
, why doself.indices = self._make_index(8)
? Would the performance hit of doing this only upon the first insertion be significant? The way it is, having many empty dicts has an extra 8 bytes of overhead per dict for the unused array of indices.benchmark results:
Dict
dict
tests:
@derek zhou I think you are missing the point. This is not meant to be faster than a dict for insert, update and delete at least not in a pure Python implementation. This is meant to be more compact and faster looping, and it meets this goal even in a pure Python.