#!/usr/bin/env python # This file is licensed under the GNU General Public License found at # email: Mark Janssen """Dictionary with default values, collision function, and sorted string output.""" import exceptions import copy class KeyAlreadyExists(exceptions.LookupError): pass class _use_default_: """Dummy class used as value in function parameters to indicate no default value specified; i.e. use the value in DefDict._default. Created special class to avoid clashing with possible value passed by user.""" #XXX perhaps should have instantiate default which takes arbitrary # number of parameters that will be passed to value stored in DefDict._default # to allow object creation. DefDict.add would check if isinstance(default, _use_default_) #define some useful collision functions #May wish to return an expression instead of setting ddict directly # to allow lambda functions to be passed as collision functions. # may sacrifice speed for cases when value modified in place or when old value lookup not needed. _OVERWRITE_ = None #i.e. will use standard dict overwrite semantics def _RETAIN_(ddict, key, new_value): pass #do nothing to the value def _RAISE_(ddict, key, new_value): raise KeyAlreadyExists, repr(key) def _ADD_(ddict, key, new_value): ddict[key] += new_value def _MAX_(ddict, key, new_value): ddict[key] = max(ddict[key], new_value) def _MIN_(ddict, key, new_value): ddict[key] = min(ddict[key], new_value) def _OUTPUT_KEY_(ddict, key, new_value): print key #should probably send to stderr class DefDict(dict): """Extends standard dictionary type by allowing user to specify a default value when a key is inserted. A 'collision' function can be provided to specify what should be done to the value when a key is added that already exists. User-defined collision function should take (defdict, key, new_value) as parameters. """ #XXX may wish to have collision method instead of constantly passing as parameter __slots__ = ['_default'] def __init__(self, init={}, default=None, collision=_OVERWRITE_): """Create dictionary and initialize with mapping or list of (key, value) pairs, if given. Sets a default value for the keys when no other specified. Initialization interface similar to standard dictionary: >>> dd = DefDict() #create empty DefDict >>> print dd {} >>> print DefDict({1: 1, 2: 4, 3: 9}) #initialize with dict type {1: 1, 2: 4, 3: 9} Initialize with list of (key, value) pairs: >>> print DefDict([('a', 2), ('b', 3), ('c', 1), ('a', 5)], collision=_ADD_) {'a': 7, 'b': 3, 'c': 1} A sequence of (key, value) pairs may contain duplicate keys, like above. The resulting value of such "key collisions" can be specified by providing a collision function. The collision function will be called with parameters (self, key, new_value) and should set the value of self[key] as desired. This module defines a few useful collision functions: _OVERWRITE_: the default--standard dictionary semantics; i.e. if key already exists, new value overwrites existing key value. _RETAIN_: value remains unchanged if key already exists. _RAISE_EXCEPTION_: raise 'KeyAlreadyExists' exception if key already exists. Value remains unchanged. _ADD_: sets value to existing value + new value _MAX_: sets to greater of old, new values _MIN_: sets to lesser of old, new values """ self._default = default dict.__init__(self) if isinstance(init, dict): #don't use dict(init) since derives classes have special setitem behavior init = init.iteritems() #list of (key, value) pairs may contain duplicates for key, value in init: self.setdefault(key, value, collision) def fromkeys(cls, iterable, default=None, collision=_OVERWRITE_): """Create dictionary from iterable with optional default value. One can initialize defdict with a sequence of keys. The dictionary values of the keys will be determined by the default value, if given, otherwise defaults to None. >>> print DefDict.fromkeys(["apple", "banana", "carrot"]) {'apple': None, 'banana': None, 'carrot': None} >>> dd = DefDict.fromkeys(range(4), 0) #initialize with values = 0 >>> print dd {0: 0, 1: 0, 2: 0, 3: 0} >>> dd.update([0]) #default value retained? >>> print dd[0] #Yes if output is 1 1 >>> print DefDict.fromkeys("abacab", 1, _ADD_) {'a': 3, 'b': 2, 'c': 1} """ dd = cls(default=default) for key in iterable: dd.setdefault(key, default, collision) return dd fromkeys = classmethod(fromkeys) def update(self, other, collision=_OVERWRITE_): """Updates defdict from mapping type or iterable with optional collision function. >>> d = DefDict.fromkeys('ab', 1) >>> d.update({'a': 0, 'c': 2}) >>> print d {'a': 0, 'b': 1, 'c': 2} As seen above, when updating a key which already exists and no collision function is specified, update defaults to standard dictionary OVERWRITE semantics. This behavior can be modified by passing a collision function. >>> d.update({'b': 3, 'd': 9}, _ADD_) >>> print d {'a': 0, 'b': 4, 'c': 2, 'd': 9} >>> d._default = 5 #manually change default value >>> d.update(['b','c']) #update from non-dict >>> d._default = 1 #set back to original >>> d.update('abd') #string are iterated >>> print d {'a': 1, 'b': 1, 'c': 5, 'd': 1} >>> d.update('de', _ADD_) >>> print d {'a': 1, 'b': 1, 'c': 5, 'd': 2, 'e': 1} NOTE: If collision function raises an exception, DefDict may be left in partially-updated state. """ #perhaps should catch any exceptions that may be caused by collision # and store aberrent keys in the exception to be reported later. if isinstance(other, dict): for key, value in other.iteritems(): self.setdefault(key, value, collision) else: #given iterable for key in other: self.setdefault(key, self._default, collision) def setdefault(self, key, value = _use_default_, collision=_RETAIN_): """Behaves like standard dict.setdefault, but uses value in _default attribute if no default parameter specified. >>> dd = DefDict(default=5) >>> dd.setdefault('a', 10), dd.setdefault('b'), dd.setdefault('b', 11) (10, 5, 5) >>> print dd {'a': 10, 'b': 5} A collision function can also be passed to override setdefault's standard RETAIN semantics. >>> dd.setdefault('a', collision=_OVERWRITE_), dd.setdefault('b', 6, _ADD_) (5, 11) >>> dd.setdefault('b', 12, _MAX_), dd.setdefault('b', 10, _MAX_) (12, 12) >>> dd.setdefault('c', 10, _RAISE_) 10 >>> dd.setdefault('c', 10, _RAISE_) Traceback (most recent call last): KeyAlreadyExists: 'c' >>> print dd {'a': 5, 'b': 12, 'c': 10} Default value is NOT copied if non-simple type (ex. list, dict). If values must be distinct objects, then you must subclass and override this method or __setitem__() to create a copy of the default. >>> dd = DefDict(default=[]) >>> dd.setdefault(1), dd.setdefault(2) ([], []) >>> dd[1] is dd[2] #keys 1 and 2 do have distinct list objects True >>> dd[2].append(42) #will also change value in dd[1]!!! >>> print dd {1: [42], 2: [42]} """ key_absent = key not in self #fail immediately if key unhashable if value == _use_default_: value = self._default #XXX should make copy for non-simple default, or rely on __setitem__() to make copy? if collision == _OVERWRITE_ or key_absent: self[key] = value #note: subclass may override setitem method so value may be modified else: collision(self, key, value) return dict.__getitem__(self, key) #may wish to allow dd[key] to insert key in dd with default value def get(self, key, *args): """Behaves like standard dict.get, but uses value in _default attribute if no default parameter specified. >>> dd = DefDict({'a': 10}, 0) >>> dd.get('a'), dd.get('b'), dd.get('b', 11) (10, 0, 11) >>> print dd {'a': 10} """ if not args: args = (self._default,) return dict.get(self, key, *args) def copy(self): """Return shallow copy of dictionary. >>> dd = DefDict.fromkeys(range(5), 5) >>> ddcopy = dd.copy() >>> print ddcopy._default, isinstance(ddcopy, DefDict); print ddcopy 5 True {0: 5, 1: 5, 2: 5, 3: 5, 4: 5} >>> ddcopy[0] = 7 >>> print dd[0], ddcopy[0] 5 7 """ return self.__class__(self, self._default) __copy__ = copy def __str__(self): """Convert self to string with keys in sorted order. >>> str(DefDict()) '{}' >>> str(DefDict({9: 0, 'test': 0, 'a': 0, 0: 0})) "{0: 0, 9: 0, 'a': 0, 'test': 0}" """ if not self: return '{}' #nothing to sort keys = self.keys() keys.sort() return '{' + ', '.join(["%r: %s" % (k, self[k]) for k in keys]) + '}' if __name__ == "__main__": import doctest print doctest.testmod()