Welcome, guest | Sign In | My Account | Store | Cart
import array
import collections
import itertools

# Placeholder constants
FREE = -1
DUMMY = -2

class Dict(collections.MutableMapping):
    'Space efficient dictionary with fast iteration and cheap resizes.'

    @staticmethod
    def _gen_probes(hashvalue, n):
        'Same sequence of probes used in the current dictionary design'
        PERTURB_SHIFT = 5
        if hashvalue < 0:
            hashvalue = -hashvalue
        i = hashvalue & (n-1)
        yield i
        perturb = hashvalue
        while True:
            i = 5 * i + perturb + 1
            yield i & (n-1)
            perturb >>= PERTURB_SHIFT

    def _lookup(self, key, hashvalue):
        'Same lookup logic as currently used in real dicts'
        assert self.filled < len(self.indices)   # At least one open slot
        freeslot = None
        for i in self._gen_probes(hashvalue, len(self.indices)):
            index = self.indices[i]
            if index == FREE:
                return (FREE, i) if freeslot is None else (DUMMY, freeslot)
            elif index == DUMMY:
                freeslot = i
            elif (self.keylist[index] is key or
                  self.hashlist[index] == hashvalue
                  and self.keylist[index] == key):
                    return (index, i)

    @staticmethod
    def _make_index(n):
        'New sequence of indices using the smallest possible datatype'
        if n <= 2**7: return array.array('b', [FREE]) * n       # signed char
        if n <= 2**15: return array.array('h', [FREE]) * n      # signed short
        if n <= 2**31: return array.array('l', [FREE]) * n      # signed long
        return [FREE] * n                                       # python integers

    def _resize(self, n):
        '''Reindex the existing hash/key/value entries.
           Entries do not get moved, they only get new indices.
           No calls are made to hash() or __eq__().

        '''
        n = 2 ** n.bit_length()                     # round-up to power-of-two
        self.indices = self._make_index(n)
        for index, hashvalue in enumerate(self.hashlist):
            for i in Dict._gen_probes(hashvalue, n):
                if self.indices[i] == FREE:
                    break
            self.indices[i] = index
        self.filled = self.used

    def clear(self):
        self.indices = self._make_index(8)
        self.hashlist = []
        self.keylist = []
        self.valuelist = []
        self.used = 0
        self.filled = 0                                         # used + dummies

    def __init__(self, *args, **kwds):
        try:
            self.keylist
        except AttributeError:
            self.clear()
        self.update(*args, **kwds)

    def __getitem__(self, key):
        hashvalue = hash(key)
        index, i = self._lookup(key, hashvalue)
        if index < 0:
            raise KeyError(key)
        return self.valuelist[index]

    def __setitem__(self, key, value):
        hashvalue = hash(key)
        index, i = self._lookup(key, hashvalue)
        if index < 0:
            self.indices[i] = self.used
            self.hashlist.append(hashvalue)
            self.keylist.append(key)
            self.valuelist.append(value)
            self.used += 1
            if index == FREE:
                self.filled += 1
                if self.filled * 3 > len(self.indices) * 2:
                    self._resize(4 * len(self))
        else:
            self.valuelist[index] = value

    def __delitem__(self, key):
        hashvalue = hash(key)
        index, i = self._lookup(key, hashvalue)
        if index < 0:
            raise KeyError(key)
        self.indices[i] = DUMMY
        self.used -= 1
        # Swap with the lastmost entry to avoid leaving a "hole"
        lasthash = self.hashlist[-1]
        lastkey = self.keylist[-1]
        lastvalue = self.valuelist[-1]
        lastindex, j = self._lookup(lastkey, lasthash)
        assert lastindex >= 0
        self.indices[j] = index
        self.hashlist[index] = lasthash
        self.keylist[index] = lastkey
        self.valuelist[index] = lastvalue
        # Remove the lastmost entry
        self.hashlist.pop()
        self.keylist.pop()
        self.valuelist.pop()

    def __len__(self):
        return self.used

    def __iter__(self):
        return iter(self.keylist)

    def iterkeys(self):
        return iter(self.keylist)

    def keys(self):
        return self.keylist

    def itervalues(self):
        return iter(self.valuelist)

    def values(self):
        return self.valuelist

    def items(self):
        return zip(self.keylist, self.valuelist)

    def iteritems(self):
        return itertools.izip(self.keylist, self.valuelist)

    def __contains__(self, key):
        index, i = self._lookup(key, hash(key))
        return index >= 0

    def get(self, key, default=None):
        index, i = self._lookup(key, hash(key))
        return self.valuelist[index] if index >= 0 else default

    def popitem(self):
        try:
            key = self.keylist[-1]
            value = self.valuelist[-1]
        except IndexError:
            raise KeyError( 'popitem(): dictionary is empty')
        del self[key]
        return key, value

    def __repr__(self):
        return 'Dict(%r)' % self.items()

    def show_structure(self):
        'Diagnostic method.  Not part of the API.'
        print '=' * 50
        print self
        print 'Indices:', self.indices
        for i, row in enumerate(zip(self.hashlist, self.keylist, self.valuelist)):
            print i, row
        print '-' * 50

Diff to Previous Revision

--- revision 15 2012-12-28 07:42:43
+++ revision 16 2012-12-30 03:48:24
@@ -14,18 +14,18 @@
         'Same sequence of probes used in the current dictionary design'
         PERTURB_SHIFT = 5
         if hashvalue < 0:
-            hashvalue = int(2 ** 64 - hashvalue)
+            hashvalue = -hashvalue
         i = hashvalue & (n-1)
-        yield int(i)
+        yield i
         perturb = hashvalue
         while True:
             i = 5 * i + perturb + 1
-            yield int(i & (n-1))
+            yield i & (n-1)
             perturb >>= PERTURB_SHIFT
 
     def _lookup(self, key, hashvalue):
         'Same lookup logic as currently used in real dicts'
-        assert len(self.keylist) < len(self.indices)   # At least one open slot
+        assert self.filled < len(self.indices)   # At least one open slot
         freeslot = None
         for i in self._gen_probes(hashvalue, len(self.indices)):
             index = self.indices[i]
@@ -95,7 +95,7 @@
             if index == FREE:
                 self.filled += 1
                 if self.filled * 3 > len(self.indices) * 2:
-                    self._resize(2 * len(self))
+                    self._resize(4 * len(self))
         else:
             self.valuelist[index] = value
 
@@ -106,17 +106,16 @@
             raise KeyError(key)
         self.indices[i] = DUMMY
         self.used -= 1
-        # If needed, swap with the lastmost entry to avoid leaving a "hole"
-        if index != self.used:
-            lasthash = self.hashlist[-1]
-            lastkey = self.keylist[-1]
-            lastvalue = self.valuelist[-1]
-            lastindex, j = self._lookup(lastkey, lasthash)
-            assert lastindex >= 0 and i != j
-            self.indices[j] = index
-            self.hashlist[index] = lasthash
-            self.keylist[index] = lastkey
-            self.valuelist[index] = lastvalue
+        # Swap with the lastmost entry to avoid leaving a "hole"
+        lasthash = self.hashlist[-1]
+        lastkey = self.keylist[-1]
+        lastvalue = self.valuelist[-1]
+        lastindex, j = self._lookup(lastkey, lasthash)
+        assert lastindex >= 0
+        self.indices[j] = index
+        self.hashlist[index] = lasthash
+        self.keylist[index] = lastkey
+        self.valuelist[index] = lastvalue
         # Remove the lastmost entry
         self.hashlist.pop()
         self.keylist.pop()
@@ -146,6 +145,23 @@
     def iteritems(self):
         return itertools.izip(self.keylist, self.valuelist)
 
+    def __contains__(self, key):
+        index, i = self._lookup(key, hash(key))
+        return index >= 0
+
+    def get(self, key, default=None):
+        index, i = self._lookup(key, hash(key))
+        return self.valuelist[index] if index >= 0 else default
+
+    def popitem(self):
+        try:
+            key = self.keylist[-1]
+            value = self.valuelist[-1]
+        except IndexError:
+            raise KeyError( 'popitem(): dictionary is empty')
+        del self[key]
+        return key, value
+
     def __repr__(self):
         return 'Dict(%r)' % self.items()
 
@@ -157,9 +173,3 @@
         for i, row in enumerate(zip(self.hashlist, self.keylist, self.valuelist)):
             print i, row
         print '-' * 50
-
-
-if __name__ == '__main__':
-
-    d = Dict([('timmy', 'red'), ('barry', 'green'), ('guido', 'blue')])
-    d.show_structure()

History