Welcome, guest | Sign In | My Account | Store | Cart

Yet another implementation of an Odict, a dictionary in which the insertion order of items is preserved.

Python, 299 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
"""
odict.py  -- V.1.1, Oct 12 2006, by bearophile.

Dictionary in which the *insertion* order of items is preserved (using an internal double
linked list). In this implementation replacing an existing item keeps it at its original
position.
Requires Python V.2.4 or successive.

Note: I have removed the doctstrings from most methods to make this code shorter for
the cookbook.

Internal representation: values of the dict:
  +-----------+----------+-----------+
  | <pred key | true val | succ key> |
  +-----------+----------+-----------+
The sequence of elements uses as a double linked list. The 'links' are dict keys.
  self.lh and self.lt are the keys of first and last element inseted in the Odict.
In a C reimplementation of this data structure, things can be simplified (and speed
  up) a lot if given a value you can at the same time find its key. With that, you
  can use normal C pointers.

Memory used (Python 2.5):
set(int): 28.2 bytes/element
dict(int:None): 36.2 bytes/element
Odict(int:None): 102 bytes/element

Speed:
- This Odict is about 20-25 times slower than a dict, for insert and del operations.
- del too is O(1).
"""

class _Nil:
    """Class of the 'pointer' to the null key. For internal usage only."""
    def __repr__(self): # Useful for Odict._repr()
        return "nil"

_nil = _Nil() # 'Pointer' to the null key.

class Odict(dict):
    """
    Ordered dict data structure, with O(1) complexity for dict operations
    that modify one element.
    Overwriting values doesn't change their original sequential order."""
    def __init__(self, data=(), **kwds):
        """This doesn't accept keyword initialization as normal dicts to avoid a trap:
        inside a function or method the keyword args are accessible only as a dict,
        without a defined order, so their original order is lost.

        __init__ test, with keyword args:
        >>> Odict(a=1)
        Traceback (most recent call last):
          ...
        TypeError: __init__() of ordered dict takes no keyword arguments to avoid an ordering trap.

        If initialized with a dict the order of elements is undefined!:
        >>> o = Odict({"a":1, "b":2, "c":3, "d":4})
        >>> print o
        {'a': 1, 'c': 3, 'b': 2, 'd': 4}
        """
        if kwds:
            raise TypeError("__init__() of ordered dict takes no keyword arguments to"
                            " avoid an ordering trap.")
        dict.__init__(self)
        self.lh = _nil # Double-linked list header
        self.lt = _nil # Double-linked list tail
        # If you give a normal dict, then the order of elements is undefined
        if hasattr(data, "iteritems"):
            for key, val in data.iteritems():
                self[key] = val
        else:
            for key, val in data:
                self[key] = val

    def __getitem__(self, key):
        return dict.__getitem__(self, key)[1]

    def __setitem__(self, key, val):
        if key in self:
            dict.__getitem__(self, key)[1] = val
        else:
            new = [self.lt, val, _nil]
            dict.__setitem__(self, key, new)
            if self.lt is _nil:
                self.lh = key
            else:
                dict.__getitem__(self, self.lt)[2] = key
            self.lt = key

    def __delitem__(self, key):
        """
        del test 1, removal from empty Odict:
        >>> o = Odict()
        >>> del o["1"]
        Traceback (most recent call last):
          ...
        KeyError: '1'

        del test 2, removal from Odict with one element:
        >>> o = Odict()
        >>> o["1"] = 1
        >>> del o["1"]
        >>> o.lh, o.lt, o, o
        (nil, nil, Odict(), Odict())
        >>> o._repr()
        'Odict low level repr lh,lt,data: nil, nil, {}'

        del test 3, removal firt element of the Odict sequence:
        >>> o = Odict()
        >>> for i in [1,2,3]: o[str(i)] = i
        >>> del o["1"]
        >>> o.lh, o.lt, o
        ('2', '3', Odict([('2', 2), ('3', 3)]))

        del test 4, removal element in the middle of the Odict sequence:
        >>> o = Odict()
        >>> for i in [1,2,3]: o[str(i)] = i
        >>> del o["2"]
        >>> o.lh, o.lt, o
        ('1', '3', Odict([('1', 1), ('3', 3)]))

        del test 5, removal element at the end of the Odict sequence:
        >>> o = Odict()
        >>> for i in [1,2,3]: o[str(i)] = i
        >>> del o["3"]
        >>> o.lh, o.lt, o
        ('1', '2', Odict([('1', 1), ('2', 2)]))
        """
        if key in self:
            pred, _ ,succ= dict.__getitem__(self, key)
            if pred is _nil:
                self.lh = succ
            else:
                dict.__getitem__(self, pred)[2] = succ
            if succ is _nil:
                self.lt = pred
            else:
                dict.__getitem__(self, succ)[0] = pred
            dict.__delitem__(self, key)
        else:
            raise KeyError, key

    def __str__(self):
        pairs = ("%r: %r" % (k, v) for k, v in self.iteritems())
        return "{%s}" % ", ".join(pairs)

    def __repr__(self):
        if self:
            pairs = ("(%r, %r)" % (k, v) for k, v in self.iteritems())
            return "Odict([%s])" % ", ".join(pairs)
        else:
            return "Odict()"

    def get(self, k, x=None):
        if k in self:
            return dict.__getitem__(self, k)[1]
        else:
            return x

    def __iter__(self):
        curr_key = self.lh
        while curr_key is not _nil:
            yield curr_key
            curr_key = dict.__getitem__(self, curr_key)[2]

    iterkeys = __iter__

    def keys(self):
        return list(self.iterkeys())

    def itervalues(self):
        curr_key = self.lh
        while curr_key is not _nil:
            _, val, curr_key = dict.__getitem__(self, curr_key)
            yield val

    def values(self):
        return list(self.itervalues())

    def iteritems(self):
        curr_key = self.lh
        while curr_key is not _nil:
            _, val, next_key = dict.__getitem__(self, curr_key)
            yield curr_key, val
            curr_key = next_key

    def items(self):
        return list(self.iteritems())

    def clear(self):
        dict.clear(self)
        self.lh = _nil
        self.lt = _nil

    def copy(self):
        return self.__class__(self)

    def update(self, data=(), **kwds):
        if kwds:
            raise TypeError("update() of ordered dict takes no keyword arguments"
                            " to avoid an ordering trap.")
        if hasattr(data, "iteritems"):
            for key, val in data.iteritems():
                self[key] = val
        else:
            for key, val in data:
                self[key] = val

    @classmethod
    def fromkeys(cls, seq, value=None):
        new = cls()
        for key in seq:
            new[key] = value
        return new

    def setdefault(self, k, x=None):
        if k in self:
            return self[k]
        else:
            self[k] = x
            return x

    def pop(self, k, x=_nil):
        if k in self:
            val = self[k]
            del self[k]
            return val
        elif x is _nil:
            raise KeyError(k)
        else:
            return x

    def popitem(self):
        if self:
            key = self.lt
            val = dict.__getitem__(self, key)[1]
            self.__delitem__(key)
            return key, val
        else:
            raise KeyError("'popitem(): ordered dictionary is empty'")

    # Some Odict-specific methods ---------------------------------
    def riterkeys(self):
        """To iterate on keys in reversed order."""
        curr_key = self.lt
        while curr_key is not _nil:
            yield curr_key
            curr_key = dict.__getitem__(self, curr_key)[0]

    __reversed__ = riterkeys

    def rkeys(self):
        """List of the keys in reversed order."""
        return list(self.riterkeys())

    def ritervalues(self):
        """To iterate on values in reversed order."""
        curr_key = self.lt
        while curr_key is not _nil:
            curr_key, val, _ = dict.__getitem__(self, curr_key)
            yield val

    def rvalues(self):
        """List of the values in reversed order."""
        return list(self.ritervalues())

    def riteritems(self):
        """To iterate on (key, value) in reversed order."""
        curr_key = self.lt
        while curr_key is not _nil:
            pred_key, val, _ = dict.__getitem__(self, curr_key)
            yield curr_key, val
            curr_key = pred_key

    def ritems(self):
        """List of the (key, value) in reversed order."""
        return list(self.riteritems())

    def firstkey(self):
        if self:
            return self.lh
        else:
            raise KeyError("'firstkey(): ordered dictionary is empty'")

    def lastkey(self):
        if self:
            return self.lt
        else:
            raise KeyError("'lastkey(): ordered dictionary is empty'")

    def _repr(self):
        """_repr(): low level repr of the whole data contained in the Odict.
        Useful for debugging."""
        form = "Odict low level repr lh,lt,data: %r, %r, %s"
        return form % (self.lh, self.lt, dict.__repr__(self))

if __name__ == "__main__":
    import doctest
    doctest.testmod()
    print "Tests done."

I have removed the doctstrings from most methods to make this code shorter for the cookbook. get, set and del operations are O(1). I don't know if Odict (with the same operation complexity) can be written in Python much faster than this, this used double-linked lists of keys, that are quite slow to manage. A C implementation can be quite faster.

Created by bearophile - on Wed, 11 Oct 2006 (PSF)
Python recipes (4591)
bearophile -'s recipes (15)

Required Modules

Other Information and Tasks