A simple implementation of a dictionary which always (when applicable) returns keys, values, items (key-value pairs) sorted by keys (inserting/removing order doesn't matter and only keys are important; so please note that it is something different than OrderedDict in Python 3.1/2.7 or Django's SortedDict).
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 | #!/usr/bin/env python
#
# sorteddict.py
# Sorted dictionary (implementation for Python 2.x)
#
# Copyright (c) 2010 Jan Kaliszewski (zuo)
#
# The MIT License:
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
from bisect import bisect_left, insort
from itertools import izip, repeat
def dictdoc(method):
"A decorator making reuse of the ordinary dict's docstrings more concise."
dict_method = getattr(dict, method.__name__)
if hasattr(dict_method, '__doc__'):
method.__doc__ = dict_method.__doc__
return method
class SortedDict(dict):
'''Dictionary with sorted keys.
The interface is similar to the ordinary dict's one, but:
* methods: __repr__(), __str__(), __iter__(), iterkeys(), itervalues(),
iteritems(), keys(), values(), items() and popitem() -- return results
taking into consideration sorted keys order;
* new methods: largest_key(), largest_item(), smallest_key(),
smallest_item() added.
'''
def __init__(self, *args, **kwargs):
'''Like with the ordinary dict: from a mapping, from an iterable
of (key, value) pairs, or from keyword arguments.'''
dict.__init__(self, *args, **kwargs)
self._sorted_keys = sorted(dict.iterkeys(self))
@dictdoc
def __repr__(self):
return 'SortedDict({%s})' % ', '.join('%r: %r' % item
for item in self.iteritems())
@dictdoc
def __str__(self):
return repr(self)
@dictdoc
def __setitem__(self, key, value):
key_is_new = key not in self
dict.__setitem__(self, key, value)
if key_is_new:
insort(self._sorted_keys, key)
@dictdoc
def __delitem__(self, key):
dict.__delitem__(self, key)
del self._sorted_keys[bisect_left(self._sorted_keys, key)]
def __iter__(self, reverse=False):
'''D.__iter__() <==> iter(D) <==> D.iterkeys() -> an iterator over
sorted keys (add reverse=True for reverse ordering).'''
if reverse:
return reversed(self._sorted_keys)
else:
return iter(self._sorted_keys)
iterkeys = __iter__
def itervalues(self, reverse=False):
'''D.itervalues() -> an iterator over values sorted by keys
(add reverse=True for reverse ordering).'''
return (self[key] for key in self.iterkeys(reverse))
def iteritems(self, reverse=False):
'''D.iteritems() -> an iterator over (key, value) pairs sorted by keys
(add reverse=True for reverse ordering).'''
return ((key, self[key]) for key in self.iterkeys(reverse))
def keys(self, reverse=False):
'''D.keys() -> a sorted list of keys
(add reverse=True for reverse ordering).'''
return list(self.iterkeys(reverse))
def values(self, reverse=False):
'''D.values() -> a list of values sorted by keys
(add reverse=True for reverse ordering).'''
return list(self.itervalues(reverse))
def items(self, reverse=False):
'''D.items() -> a list of (key, value) pairs sorted by keys
(add reverse=True for reverse ordering).'''
return list(self.iteritems(reverse))
@dictdoc
def clear(self):
dict.clear(self)
del self._sorted_keys[:]
def copy(self):
'''D.copy() -> a shallow copy of D (still as a SortedDict).'''
return self.__class__(self)
@classmethod
@dictdoc
def fromkeys(cls, seq, value=None):
return cls(izip(seq, repeat(value)))
@dictdoc
def pop(self, key, *args, **kwargs):
if key in self:
del self._sorted_keys[bisect_left(self._sorted_keys, key)]
return dict.pop(self, key, *args, **kwargs)
def popitem(self):
'''D.popitem() -> (k, v). Remove and return a (key, value) pair with
the largest key; raise KeyError if D is empty.'''
try:
key = self._sorted_keys.pop()
except IndexError:
raise KeyError('popitem(): dictionary is empty')
else:
return key, dict.pop(self, key)
@dictdoc
def setdefault(self, key, default=None):
if key not in self:
insort(self._sorted_keys, key)
return dict.setdefault(self, key, default)
@dictdoc
def update(self, other=()):
if hasattr(other, 'keys') and hasattr(other, 'values'):
# mapping
newkeys = [key for key in other if key not in self]
else:
# iterator/seqence of pairs
other = list(other)
newkeys = [key for key, _ in other if key not in self]
dict.update(self, other)
for key in newkeys:
insort(self._sorted_keys, key)
def largest_key(self):
'''D.largest_key() -> the largest key; raise KeyError if D is empty.'''
try:
return self._sorted_keys[-1]
except IndexError:
raise KeyError('largest_key(): dictionary is empty')
def largest_item(self):
'''D.largest_item() -> a (key, value) pair with the largest key;
raise KeyError if D is empty.'''
key = self.largest_key()
return key, self[key]
def smallest_key(self):
'''D.smallest_key() -> the smallest key; raise KeyError if D is empty.'''
try:
return self._sorted_keys[0]
except IndexError:
raise KeyError('smallest_key(): dictionary is empty')
def smallest_item(self):
'''D.smallest_item() -> a (key, value) pair with the smallest key;
raise KeyError if D is empty.'''
key = self.smallest_key()
return key, self[key]
|
Implemented using an additional "synchronized" list of keys, maintained with functions from bisect
module. Maintaining parallely two containers uses a bit more memory than the original dict
and (what's more important) slows down modifying operations (setting, deleting, updating...) -- but keeps "getting" lookups (d[x], get(x), x in d...) as fast as when using original dict
. (Another approach could be e.g. to get rid of the original dict
's mechanism, basing only on bisect
functions -- it'd maybe make modifying operations faster but would distincly slow down "getting" lookups; other approaches -- see links below...).
UPDATED:
reverse
keyword argument for [iter
]keys
|values
|items()
methods added.- Methods:
largest_key()
,largest_item()
,smallest_key()
,smallest_item()
added. - Docstrings and auxiliary
dictdoc
decorator added.
TODO:
key
option (like insorted()
,min()
,max()
etc.).
See also (found @PyPI):
You might also be interested in the "sorteddict" type in my blist package. It solves the same problem, but it efficiently keeps the keys in sorted order, yielding different performance characteristics. For example, d.keys()[0] is O(1).
Links:
Like the blist module, the sortedcontainers module provides a SortedDict data type that maintains the keys in sorted order. It's written in pure-Python, very fast (fast-as-C implementations, faster than blist), fully tested (100% coverage and stress) and documented (with performance comparison). Learn more about sortedcontainers, available on PyPI and github.