default dictionary with collision function to handle key collisions.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 | #!/usr/bin/env python
# This file is licensed under the GNU General Public License found at <http://www.gnu.org/licenses>
# email: Mark Janssen <dreamingforward@gmail.com>
"""Dictionary with default values, collision function, and sorted string output."""
import exceptions
import copy
class KeyAlreadyExists(exceptions.LookupError): pass
class _use_default_:
"""Dummy class used as value in function parameters to indicate
no default value specified; i.e. use the value in DefDict._default.
Created special class to avoid clashing with possible value passed
by user."""
#XXX perhaps should have instantiate default which takes arbitrary
# number of parameters that will be passed to value stored in DefDict._default
# to allow object creation. DefDict.add would check if isinstance(default, _use_default_)
#define some useful collision functions
#May wish to return an expression instead of setting ddict directly
# to allow lambda functions to be passed as collision functions.
# may sacrifice speed for cases when value modified in place or when old value lookup not needed.
_OVERWRITE_ = None #i.e. will use standard dict overwrite semantics
def _RETAIN_(ddict, key, new_value): pass #do nothing to the value
def _RAISE_(ddict, key, new_value): raise KeyAlreadyExists, repr(key)
def _ADD_(ddict, key, new_value): ddict[key] += new_value
def _MAX_(ddict, key, new_value): ddict[key] = max(ddict[key], new_value)
def _MIN_(ddict, key, new_value): ddict[key] = min(ddict[key], new_value)
def _OUTPUT_KEY_(ddict, key, new_value): print key #should probably send to stderr
class DefDict(dict):
"""Extends standard dictionary type by allowing user to
specify a default value when a key is inserted.
A 'collision' function can be provided to specify what should
be done to the value when a key is added that already exists.
User-defined collision function should take
(defdict, key, new_value) as parameters.
"""
#XXX may wish to have collision method instead of constantly passing as parameter
__slots__ = ['_default']
def __init__(self, init={}, default=None, collision=_OVERWRITE_):
"""Create dictionary and initialize with mapping or list of (key, value) pairs, if given.
Sets a default value for the keys when no other specified.
Initialization interface similar to standard dictionary:
>>> dd = DefDict() #create empty DefDict
>>> print dd
{}
>>> print DefDict({1: 1, 2: 4, 3: 9}) #initialize with dict type
{1: 1, 2: 4, 3: 9}
Initialize with list of (key, value) pairs:
>>> print DefDict([('a', 2), ('b', 3), ('c', 1), ('a', 5)], collision=_ADD_)
{'a': 7, 'b': 3, 'c': 1}
A sequence of (key, value) pairs may contain duplicate keys, like above.
The resulting value of such "key collisions" can be specified
by providing a collision function. The collision function will
be called with parameters (self, key, new_value) and should set the
value of self[key] as desired.
This module defines a few useful collision functions:
_OVERWRITE_: the default--standard dictionary semantics;
i.e. if key already exists, new value overwrites existing
key value.
_RETAIN_: value remains unchanged if key already exists.
_RAISE_EXCEPTION_: raise 'KeyAlreadyExists' exception if key already
exists. Value remains unchanged.
_ADD_: sets value to existing value + new value
_MAX_: sets to greater of old, new values
_MIN_: sets to lesser of old, new values
"""
self._default = default
dict.__init__(self)
if isinstance(init, dict): #don't use dict(init) since derives classes have special setitem behavior
init = init.iteritems()
#list of (key, value) pairs may contain duplicates
for key, value in init:
self.setdefault(key, value, collision)
def fromkeys(cls, iterable, default=None, collision=_OVERWRITE_):
"""Create dictionary from iterable with optional default value.
One can initialize defdict with a sequence of keys.
The dictionary values of the keys will be determined by the default value,
if given, otherwise defaults to None.
>>> print DefDict.fromkeys(["apple", "banana", "carrot"])
{'apple': None, 'banana': None, 'carrot': None}
>>> dd = DefDict.fromkeys(range(4), 0) #initialize with values = 0
>>> print dd
{0: 0, 1: 0, 2: 0, 3: 0}
>>> dd.update([0]) #default value retained?
>>> print dd[0] #Yes if output is 1
1
>>> print DefDict.fromkeys("abacab", 1, _ADD_)
{'a': 3, 'b': 2, 'c': 1}
"""
dd = cls(default=default)
for key in iterable:
dd.setdefault(key, default, collision)
return dd
fromkeys = classmethod(fromkeys)
def update(self, other, collision=_OVERWRITE_):
"""Updates defdict from mapping type or iterable with optional collision function.
>>> d = DefDict.fromkeys('ab', 1)
>>> d.update({'a': 0, 'c': 2})
>>> print d
{'a': 0, 'b': 1, 'c': 2}
As seen above, when updating a key which already exists and no
collision function is specified, update defaults to standard dictionary
OVERWRITE semantics. This behavior can be modified by passing a
collision function.
>>> d.update({'b': 3, 'd': 9}, _ADD_)
>>> print d
{'a': 0, 'b': 4, 'c': 2, 'd': 9}
>>> d._default = 5 #manually change default value
>>> d.update(['b','c']) #update from non-dict
>>> d._default = 1 #set back to original
>>> d.update('abd') #string are iterated
>>> print d
{'a': 1, 'b': 1, 'c': 5, 'd': 1}
>>> d.update('de', _ADD_)
>>> print d
{'a': 1, 'b': 1, 'c': 5, 'd': 2, 'e': 1}
NOTE: If collision function raises an exception, DefDict may be
left in partially-updated state.
"""
#perhaps should catch any exceptions that may be caused by collision
# and store aberrent keys in the exception to be reported later.
if isinstance(other, dict):
for key, value in other.iteritems():
self.setdefault(key, value, collision)
else: #given iterable
for key in other:
self.setdefault(key, self._default, collision)
def setdefault(self, key, value = _use_default_, collision=_RETAIN_):
"""Behaves like standard dict.setdefault, but uses value in _default attribute
if no default parameter specified.
>>> dd = DefDict(default=5)
>>> dd.setdefault('a', 10), dd.setdefault('b'), dd.setdefault('b', 11)
(10, 5, 5)
>>> print dd
{'a': 10, 'b': 5}
A collision function can also be passed to override setdefault's
standard RETAIN semantics.
>>> dd.setdefault('a', collision=_OVERWRITE_), dd.setdefault('b', 6, _ADD_)
(5, 11)
>>> dd.setdefault('b', 12, _MAX_), dd.setdefault('b', 10, _MAX_)
(12, 12)
>>> dd.setdefault('c', 10, _RAISE_)
10
>>> dd.setdefault('c', 10, _RAISE_)
Traceback (most recent call last):
KeyAlreadyExists: 'c'
>>> print dd
{'a': 5, 'b': 12, 'c': 10}
Default value is NOT copied if non-simple type (ex. list, dict).
If values must be distinct objects, then you must subclass and
override this method or __setitem__() to create a copy of the default.
>>> dd = DefDict(default=[])
>>> dd.setdefault(1), dd.setdefault(2)
([], [])
>>> dd[1] is dd[2] #keys 1 and 2 do have distinct list objects
True
>>> dd[2].append(42) #will also change value in dd[1]!!!
>>> print dd
{1: [42], 2: [42]}
"""
key_absent = key not in self #fail immediately if key unhashable
if value == _use_default_: value = self._default #XXX should make copy for non-simple default, or rely on __setitem__() to make copy?
if collision == _OVERWRITE_ or key_absent:
self[key] = value #note: subclass may override setitem method so value may be modified
else:
collision(self, key, value)
return dict.__getitem__(self, key) #may wish to allow dd[key] to insert key in dd with default value
def get(self, key, *args):
"""Behaves like standard dict.get, but uses value in _default attribute
if no default parameter specified.
>>> dd = DefDict({'a': 10}, 0)
>>> dd.get('a'), dd.get('b'), dd.get('b', 11)
(10, 0, 11)
>>> print dd
{'a': 10}
"""
if not args: args = (self._default,)
return dict.get(self, key, *args)
def copy(self):
"""Return shallow copy of dictionary.
>>> dd = DefDict.fromkeys(range(5), 5)
>>> ddcopy = dd.copy()
>>> print ddcopy._default, isinstance(ddcopy, DefDict); print ddcopy
5 True
{0: 5, 1: 5, 2: 5, 3: 5, 4: 5}
>>> ddcopy[0] = 7
>>> print dd[0], ddcopy[0]
5 7
"""
return self.__class__(self, self._default)
__copy__ = copy
def __str__(self):
"""Convert self to string with keys in sorted order.
>>> str(DefDict())
'{}'
>>> str(DefDict({9: 0, 'test': 0, 'a': 0, 0: 0}))
"{0: 0, 9: 0, 'a': 0, 'test': 0}"
"""
if not self: return '{}' #nothing to sort
keys = self.keys()
keys.sort()
return '{' + ', '.join(["%r: %s" % (k, self[k]) for k in keys]) + '}'
if __name__ == "__main__":
import doctest
print doctest.testmod()
|
This is a great solution for making sophisticated data structures that manipulate values of dictionaries in some way. Very easy to make a Counter class for example, or a Graph abstract class.