Welcome, guest | Sign In | My Account | Store | Cart
import array
import collections
import itertools
import pprint

# Placeholder constants
FREE = -1
DUMMY = -2
NOT_FOUND = -3

class Dict(collections.MutableMapping):
    'Space efficient dictionary with fast iteration and cheaper resizes.'

    @staticmethod
    def _gen_probes(hashvalue, n):
        'Same sequence of probes used in the current dictionary design'
        PERTURB_SHIFT = 5
        if hashvalue < 0:
            hashvalue = int(2 ** 64 - hashvalue)
        i = hashvalue & (n-1)
        yield int(i)
        perturb = hashvalue
        while True:
            i = 5 * i + perturb + 1
            yield int(i & (n-1))
            perturb >>= PERTURB_SHIFT

    def _lookup(self, key, hashvalue):
        'Same lookup logic as currently used in real dicts'
        assert len(self.keylist) < len(self.indices)   # At least one open slot
        freeslot = None
        for i in self._gen_probes(hashvalue, len(self.indices)):
            index = self.indices[i]
            if index == FREE:
                if freeslot is not None:
                    i = freeslot
                return (NOT_FOUND, i)
            elif index == DUMMY:
                freeslot = i
            elif (self.keylist[index] is key or
                  self.hashlist[index] == hashvalue
                  and self.keylist[index] == key):
                    return (index, i)

    @staticmethod
    def _make_index(n):
        'New sequence of indices using the smallest possible datatype'
        if n <= 2**7: return array.array('b', [FREE]) * n       # signed char
        if n <= 2**15: return array.array('h', [FREE]) * n      # signed short
        if n <= 2**31: return array.array('l', [FREE]) * n      # signed long
        return [FREE] * n                                       # python integers

    def _resize(self, n):
        '''Reindex the existing hash/key/value entries.
           Entries do not get moved, they only get new indices.
           No calls are made to hash() or __eq__().

        '''
        n = 2 ** n.bit_length()
        self.indices = self._make_index(n)
        for index, hashvalue in enumerate(self.hashlist):
            for i in Dict._gen_probes(hashvalue, n):
                if self.indices[i] == FREE:
                    break
            self.indices[i] = index

    def __init__(self, *args, **kwds):
        self.indices = self._make_index(8)
        self.hashlist = []
        self.keylist = []
        self.valuelist = []
        self.size = 0
        self.update(*args, **kwds)

    def __getitem__(self, key):
        hashvalue = hash(key)
        index, i = self._lookup(key, hashvalue)
        if index == NOT_FOUND:
            raise KeyError(key)
        return self.valuelist[index]

    def __setitem__(self, key, value):
        hashvalue = hash(key)
        index, i = self._lookup(key, hashvalue)
        if index == NOT_FOUND:
            self.indices[i] = self.size
            self.hashlist.append(hashvalue)
            self.keylist.append(key)
            self.valuelist.append(value)
            self.size += 1
            if self.indices.count(FREE) * 3 < len(self.indices):
                self._resize(2 * len(self))
        else:
            self.valuelist[index] = value

    def __delitem__(self, key):
        hashvalue = hash(key)
        index, i = self._lookup(key, hashvalue)
        if index == NOT_FOUND:
            raise KeyError(key)
        self.indices[i] = DUMMY
        self.size -= 1
        # If needed, swap with the lastmost entry to avoid leaving a "hole"
        if index != self.size:
            lasthash = self.hashlist[-1]
            lastkey = self.keylist[-1]
            lastvalue = self.valuelist[-1]
            lastindex, j = self._lookup(lastkey, lasthash)
            assert lastindex != NOT_FOUND and i != j
            self.indices[j] = index
            self.hashlist[index] = lasthash
            self.keylist[index] = lastkey
            self.valuelist[index] = lastvalue
        # Remove the lastmost entry
        self.hashlist.pop()
        self.keylist.pop()
        self.valuelist.pop()

    def __iter__(self):
        for key in self.keylist:
            yield key

    def __len__(self):
        return self.size

    def __repr__(self):
        return 'Dict(%r)' % self.items()

    def show_structure(self):
        'Diagnostic method.  Not part of the API.'
        print '=' * 50
        print self
        print 'Indices:', self.indices
        entries = itertools.izip_longest(
                        xrange(len(self.hashlist)),
                        self.hashlist, self.keylist, self.valuelist,
                        fillvalue = ':ERROR:')
        pprint.pprint(list(entries))
        print '-' * 50


if __name__ == '__main__':

    d = Dict([('timmy', 'red'), ('barry', 'green'), ('guido', 'blue')])
    d.show_structure()

Diff to Previous Revision

--- revision 10 2012-12-17 07:20:59
+++ revision 11 2012-12-18 07:18:48
@@ -6,6 +6,7 @@
 # Placeholder constants
 FREE = -1
 DUMMY = -2
+NOT_FOUND = -3
 
 class Dict(collections.MutableMapping):
     'Space efficient dictionary with fast iteration and cheaper resizes.'
@@ -29,18 +30,17 @@
         assert len(self.keylist) < len(self.indices)   # At least one open slot
         freeslot = None
         for i in self._gen_probes(hashvalue, len(self.indices)):
-            if self.indices[i] == FREE:
+            index = self.indices[i]
+            if index == FREE:
                 if freeslot is not None:
                     i = freeslot
-                return (False, i)
-            elif self.indices[i] == DUMMY:
+                return (NOT_FOUND, i)
+            elif index == DUMMY:
                 freeslot = i
-            else:
-                index = self.indices[i]
-                if (self.keylist[index] is key or
-                    self.hashlist[index] == hashvalue
-                    and self.keylist[index] == key):
-                    return (True, i)
+            elif (self.keylist[index] is key or
+                  self.hashlist[index] == hashvalue
+                  and self.keylist[index] == key):
+                    return (index, i)
 
     @staticmethod
     def _make_index(n):
@@ -48,7 +48,7 @@
         if n <= 2**7: return array.array('b', [FREE]) * n       # signed char
         if n <= 2**15: return array.array('h', [FREE]) * n      # signed short
         if n <= 2**31: return array.array('l', [FREE]) * n      # signed long
-        return [FREE] * n
+        return [FREE] * n                                       # python integers
 
     def _resize(self, n):
         '''Reindex the existing hash/key/value entries.
@@ -72,13 +72,17 @@
         self.size = 0
         self.update(*args, **kwds)
 
+    def __getitem__(self, key):
+        hashvalue = hash(key)
+        index, i = self._lookup(key, hashvalue)
+        if index == NOT_FOUND:
+            raise KeyError(key)
+        return self.valuelist[index]
+
     def __setitem__(self, key, value):
         hashvalue = hash(key)
-        found, i = self._lookup(key, hashvalue)
-        if found:
-            index = self.indices[i]
-            self.valuelist[index] = value
-        else:
+        index, i = self._lookup(key, hashvalue)
+        if index == NOT_FOUND:
             self.indices[i] = self.size
             self.hashlist.append(hashvalue)
             self.keylist.append(key)
@@ -86,38 +90,31 @@
             self.size += 1
             if self.indices.count(FREE) * 3 < len(self.indices):
                 self._resize(2 * len(self))
-
-    def __getitem__(self, key):
-        hashvalue = hash(key)
-        found, i = self._lookup(key, hashvalue)
-        if found:
-            index = self.indices[i]
-            return self.valuelist[index]
         else:
-            raise KeyError(key)
+            self.valuelist[index] = value
 
     def __delitem__(self, key):
         hashvalue = hash(key)
-        found, i = self._lookup(key, hashvalue)
-        if found:
-            index = self.indices[i]
-            self.indices[i] = DUMMY
-            self.size -= 1
-            if index != self.size:
-                lasthash = self.hashlist[-1]
-                lastkey = self.keylist[-1]
-                lastvalue = self.valuelist[-1]
-                found, j = self._lookup(lastkey, lasthash)
-                assert found and i != j
-                self.indices[j] = index
-                self.hashlist[index] = lasthash
-                self.keylist[index] = lastkey
-                self.valuelist[index] = lastvalue
-            self.hashlist.pop()
-            self.keylist.pop()
-            self.valuelist.pop()
-        else:
+        index, i = self._lookup(key, hashvalue)
+        if index == NOT_FOUND:
             raise KeyError(key)
+        self.indices[i] = DUMMY
+        self.size -= 1
+        # If needed, swap with the lastmost entry to avoid leaving a "hole"
+        if index != self.size:
+            lasthash = self.hashlist[-1]
+            lastkey = self.keylist[-1]
+            lastvalue = self.valuelist[-1]
+            lastindex, j = self._lookup(lastkey, lasthash)
+            assert lastindex != NOT_FOUND and i != j
+            self.indices[j] = index
+            self.hashlist[index] = lasthash
+            self.keylist[index] = lastkey
+            self.valuelist[index] = lastvalue
+        # Remove the lastmost entry
+        self.hashlist.pop()
+        self.keylist.pop()
+        self.valuelist.pop()
 
     def __iter__(self):
         for key in self.keylist:
@@ -132,7 +129,7 @@
     def show_structure(self):
         'Diagnostic method.  Not part of the API.'
         print '=' * 50
-        print 'Dict:', self
+        print self
         print 'Indices:', self.indices
         entries = itertools.izip_longest(
                         xrange(len(self.hashlist)),

History