Welcome, guest | Sign In | My Account | Store | Cart
#!/usr/bin/env python             
# This file is licensed under the GNU General Public License found at <http://www.gnu.org/licenses>
# email:  Mark Janssen <dreamingforward@gmail.com>

"""Dictionary with default values, collision function, and sorted string output."""

import exceptions
import copy

class KeyAlreadyExists(exceptions.LookupError): pass
class _use_default_:
   
"""Dummy class used as value in function parameters to indicate
    no default value specified; i.e. use the value in DefDict._default.
    Created special class to avoid clashing with possible value passed
    by user."""

#XXX perhaps should have instantiate default which takes arbitrary
# number of parameters that will be passed to value stored in DefDict._default
# to allow object creation.  DefDict.add would check if isinstance(default, _use_default_)

#define some useful collision functions
#May wish to return an expression instead of setting ddict directly
#  to allow lambda functions to be passed as collision functions.
#  may sacrifice speed for cases when value modified in place or when old value lookup not needed.
_OVERWRITE_
= None   #i.e. will use standard dict overwrite semantics
def _RETAIN_(ddict, key, new_value): pass  #do nothing to the value
def _RAISE_(ddict, key, new_value): raise KeyAlreadyExists, repr(key)
def _ADD_(ddict, key, new_value): ddict[key] += new_value
def _MAX_(ddict, key, new_value): ddict[key] = max(ddict[key], new_value)
def _MIN_(ddict, key, new_value): ddict[key] = min(ddict[key], new_value)
def _OUTPUT_KEY_(ddict, key, new_value): print key   #should probably send to stderr


class DefDict(dict):
   
"""Extends standard dictionary type by allowing user to
    specify a default value when a key is inserted.
    A 'collision' function can be provided to specify what should
    be done to the value when a key is added that already exists.

    User-defined collision function should take
    (defdict, key, new_value) as parameters.
    """

   
#XXX may wish to have collision method instead of constantly passing as parameter

    __slots__
= ['_default']

   
def __init__(self, init={}, default=None, collision=_OVERWRITE_):
       
"""Create dictionary and initialize with mapping or list of (key, value) pairs, if given.
        Sets a default value for the keys when no other specified.

        Initialization interface similar to standard dictionary:
        >>> dd = DefDict()  #create empty DefDict
        >>> print dd
        {}
        >>> print DefDict({1: 1, 2: 4, 3: 9}) #initialize with dict type
        {1: 1, 2: 4, 3: 9}

        Initialize with list of (key, value) pairs:
        >>> print DefDict([('a', 2), ('b', 3), ('c', 1), ('a', 5)], collision=_ADD_)
        {'a': 7, 'b': 3, 'c': 1}

        A sequence of (key, value) pairs may contain duplicate keys, like above.
        The resulting value of such "
key collisions" can be specified
        by providing a collision function.  The collision function will
        be called with parameters (self, key, new_value) and should set the
        value of self[key] as desired.

        This module defines a few useful collision functions:

        _OVERWRITE_:  the default--standard dictionary semantics;
            i.e. if key already exists, new value overwrites existing
            key value.
        _RETAIN_:  value remains unchanged if key already exists.
        _RAISE_EXCEPTION_:  raise 'KeyAlreadyExists' exception if key already
            exists.  Value remains unchanged.
        _ADD_:  sets value to existing value + new value
        _MAX_:  sets to greater of old, new values
        _MIN_:  sets to lesser of old, new values
        """


       
self._default = default
        dict
.__init__(self)
       
if isinstance(init, dict): #don't use dict(init) since derives classes have special setitem behavior
            init
= init.iteritems()
       
#list of (key, value) pairs may contain duplicates
       
for key, value in init:
           
self.setdefault(key, value, collision)

   
def fromkeys(cls, iterable, default=None, collision=_OVERWRITE_):
       
"""Create dictionary from iterable with optional default value.

        One can initialize defdict with a sequence of keys.
        The dictionary values of the keys will be determined by the default value,
        if given, otherwise defaults to None.

        >>> print DefDict.fromkeys(["
apple", "banana", "carrot"])
        {'apple': None, 'banana': None, 'carrot': None}
        >>> dd = DefDict.fromkeys(range(4), 0)   #initialize with values = 0
        >>> print dd
        {0: 0, 1: 0, 2: 0, 3: 0}
        >>> dd.update([0])  #default value retained?
        >>> print dd[0]     #Yes if output is 1
        1
        >>> print DefDict.fromkeys("
abacab", 1, _ADD_)
        {'a': 3, 'b': 2, 'c': 1}
        """

        dd
= cls(default=default)
       
for key in iterable:
             dd
.setdefault(key, default, collision)
       
return dd

    fromkeys
= classmethod(fromkeys)

   
def update(self, other, collision=_OVERWRITE_):
       
"""Updates defdict from mapping type or iterable with optional collision function.

        >>> d = DefDict.fromkeys('ab', 1)
        >>> d.update({'a': 0, 'c': 2})
        >>> print d
        {'a': 0, 'b': 1, 'c': 2}

        As seen above, when updating a key which already exists and no
        collision function is specified, update defaults to standard dictionary
        OVERWRITE semantics.  This behavior can be modified by passing a
        collision function.

        >>> d.update({'b': 3, 'd': 9}, _ADD_)
        >>> print d
        {'a': 0, 'b': 4, 'c': 2, 'd': 9}

        >>> d._default = 5          #manually change default value
        >>> d.update(['b','c'])     #update from non-dict
        >>> d._default = 1          #set back to original
        >>> d.update('abd')         #string are iterated
        >>> print d
        {'a': 1, 'b': 1, 'c': 5, 'd': 1}
        >>> d.update('de', _ADD_)
        >>> print d
        {'a': 1, 'b': 1, 'c': 5, 'd': 2, 'e': 1}

        NOTE:  If collision function raises an exception, DefDict may be
        left in partially-updated state.
        """

       
#perhaps should catch any exceptions that may be caused by collision
       
#  and store aberrent keys in the exception to be reported later.
       
if isinstance(other, dict):
           
for key, value in other.iteritems():
               
self.setdefault(key, value, collision)
       
else:  #given iterable
           
for key in other:
               
self.setdefault(key, self._default, collision)

   
def setdefault(self, key, value = _use_default_, collision=_RETAIN_):
       
"""Behaves like standard dict.setdefault, but uses value in _default attribute
        if no default parameter specified.

        >>> dd = DefDict(default=5)
        >>> dd.setdefault('a', 10), dd.setdefault('b'), dd.setdefault('b', 11)
        (10, 5, 5)
        >>> print dd
        {'a': 10, 'b': 5}

        A collision function can also be passed to override setdefault's
        standard RETAIN semantics.

        >>> dd.setdefault('a', collision=_OVERWRITE_), dd.setdefault('b', 6, _ADD_)
        (5, 11)
        >>> dd.setdefault('b', 12, _MAX_), dd.setdefault('b', 10, _MAX_)
        (12, 12)
        >>> dd.setdefault('c', 10, _RAISE_)
        10
        >>> dd.setdefault('c', 10, _RAISE_)
        Traceback (most recent call last):
        KeyAlreadyExists: 'c'
        >>> print dd
        {'a': 5, 'b': 12, 'c': 10}

        Default value is NOT copied if non-simple type (ex. list, dict).
        If values must be distinct objects, then you must subclass and
        override this method or __setitem__() to create a copy of the default.

        >>> dd = DefDict(default=[])
        >>> dd.setdefault(1), dd.setdefault(2)
        ([], [])
        >>> dd[1] is dd[2]  #keys 1 and 2 do have distinct list objects
        True
        >>> dd[2].append(42) #will also change value in dd[1]!!!
        >>> print dd
        {1: [42], 2: [42]}
        """

        key_absent
= key not in self   #fail immediately if key unhashable
       
if value == _use_default_: value = self._default #XXX should make copy for non-simple default, or rely on __setitem__() to make copy?
       
if collision == _OVERWRITE_ or key_absent:
           
self[key] = value #note: subclass may override setitem method so value may be modified
       
else:
            collision
(self, key, value)
       
return dict.__getitem__(self, key)  #may wish to allow dd[key] to insert key in dd with default value

   
def get(self, key, *args):
       
"""Behaves like standard dict.get, but uses value in _default attribute
        if no default parameter specified.

        >>> dd = DefDict({'a': 10}, 0)
        >>> dd.get('a'), dd.get('b'), dd.get('b', 11)
        (10, 0, 11)
        >>> print dd
        {'a': 10}
        """

       
if not args: args = (self._default,)
       
return dict.get(self, key, *args)

   
def copy(self):
       
"""Return shallow copy of dictionary.

        >>> dd = DefDict.fromkeys(range(5), 5)
        >>> ddcopy = dd.copy()
        >>> print ddcopy._default, isinstance(ddcopy, DefDict); print ddcopy
        5 True
        {0: 5, 1: 5, 2: 5, 3: 5, 4: 5}
        >>> ddcopy[0] = 7
        >>> print dd[0], ddcopy[0]
        5 7
        """

       
return self.__class__(self, self._default)

    __copy__
= copy

   
def __str__(self):
       
"""Convert self to string with keys in sorted order.

        >>> str(DefDict())
        '{}'
        >>> str(DefDict({9: 0, 'test': 0, 'a': 0, 0: 0}))
        "
{0: 0, 9: 0, 'a': 0, 'test': 0}"
        """

       
if not self: return '{}'    #nothing to sort
        keys
= self.keys()
        keys
.sort()
       
return '{' + ', '.join(["%r: %s" % (k, self[k]) for k in keys]) + '}'


if __name__ == "__main__":
   
import doctest
   
print doctest.testmod()

History