Welcome, guest | Sign In | My Account | Store | Cart

default dictionary with collision function to handle key collisions.

Python, 243 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
#!/usr/bin/env python             
# This file is licensed under the GNU General Public License found at <http://www.gnu.org/licenses>
# email:  Mark Janssen <dreamingforward@gmail.com>

"""Dictionary with default values, collision function, and sorted string output."""

import exceptions
import copy

class KeyAlreadyExists(exceptions.LookupError): pass
class _use_default_:
    """Dummy class used as value in function parameters to indicate
    no default value specified; i.e. use the value in DefDict._default.
    Created special class to avoid clashing with possible value passed
    by user."""
#XXX perhaps should have instantiate default which takes arbitrary
# number of parameters that will be passed to value stored in DefDict._default
# to allow object creation.  DefDict.add would check if isinstance(default, _use_default_)

#define some useful collision functions
#May wish to return an expression instead of setting ddict directly
#  to allow lambda functions to be passed as collision functions.
#  may sacrifice speed for cases when value modified in place or when old value lookup not needed.
_OVERWRITE_ = None   #i.e. will use standard dict overwrite semantics
def _RETAIN_(ddict, key, new_value): pass  #do nothing to the value
def _RAISE_(ddict, key, new_value): raise KeyAlreadyExists, repr(key)
def _ADD_(ddict, key, new_value): ddict[key] += new_value
def _MAX_(ddict, key, new_value): ddict[key] = max(ddict[key], new_value)
def _MIN_(ddict, key, new_value): ddict[key] = min(ddict[key], new_value)
def _OUTPUT_KEY_(ddict, key, new_value): print key   #should probably send to stderr


class DefDict(dict):
    """Extends standard dictionary type by allowing user to
    specify a default value when a key is inserted.
    A 'collision' function can be provided to specify what should
    be done to the value when a key is added that already exists.

    User-defined collision function should take
    (defdict, key, new_value) as parameters.
    """
    #XXX may wish to have collision method instead of constantly passing as parameter

    __slots__ = ['_default']

    def __init__(self, init={}, default=None, collision=_OVERWRITE_):
        """Create dictionary and initialize with mapping or list of (key, value) pairs, if given.
        Sets a default value for the keys when no other specified.

        Initialization interface similar to standard dictionary:
        >>> dd = DefDict()  #create empty DefDict
        >>> print dd
        {}
        >>> print DefDict({1: 1, 2: 4, 3: 9}) #initialize with dict type
        {1: 1, 2: 4, 3: 9}

        Initialize with list of (key, value) pairs:
        >>> print DefDict([('a', 2), ('b', 3), ('c', 1), ('a', 5)], collision=_ADD_)
        {'a': 7, 'b': 3, 'c': 1}

        A sequence of (key, value) pairs may contain duplicate keys, like above.
        The resulting value of such "key collisions" can be specified
        by providing a collision function.  The collision function will
        be called with parameters (self, key, new_value) and should set the
        value of self[key] as desired.

        This module defines a few useful collision functions:

        _OVERWRITE_:  the default--standard dictionary semantics;
            i.e. if key already exists, new value overwrites existing
            key value.
        _RETAIN_:  value remains unchanged if key already exists.
        _RAISE_EXCEPTION_:  raise 'KeyAlreadyExists' exception if key already
            exists.  Value remains unchanged.
        _ADD_:  sets value to existing value + new value
        _MAX_:  sets to greater of old, new values
        _MIN_:  sets to lesser of old, new values
        """

        self._default = default
        dict.__init__(self)
        if isinstance(init, dict): #don't use dict(init) since derives classes have special setitem behavior
            init = init.iteritems()
        #list of (key, value) pairs may contain duplicates
        for key, value in init:
            self.setdefault(key, value, collision)

    def fromkeys(cls, iterable, default=None, collision=_OVERWRITE_):
        """Create dictionary from iterable with optional default value.

        One can initialize defdict with a sequence of keys.
        The dictionary values of the keys will be determined by the default value,
        if given, otherwise defaults to None.

        >>> print DefDict.fromkeys(["apple", "banana", "carrot"])
        {'apple': None, 'banana': None, 'carrot': None}
        >>> dd = DefDict.fromkeys(range(4), 0)   #initialize with values = 0
        >>> print dd
        {0: 0, 1: 0, 2: 0, 3: 0}
        >>> dd.update([0])  #default value retained?
        >>> print dd[0]     #Yes if output is 1
        1
        >>> print DefDict.fromkeys("abacab", 1, _ADD_)
        {'a': 3, 'b': 2, 'c': 1}
        """
        dd = cls(default=default)
        for key in iterable:
             dd.setdefault(key, default, collision)
        return dd

    fromkeys = classmethod(fromkeys)

    def update(self, other, collision=_OVERWRITE_):
        """Updates defdict from mapping type or iterable with optional collision function.

        >>> d = DefDict.fromkeys('ab', 1)
        >>> d.update({'a': 0, 'c': 2})
        >>> print d
        {'a': 0, 'b': 1, 'c': 2}

        As seen above, when updating a key which already exists and no
        collision function is specified, update defaults to standard dictionary
        OVERWRITE semantics.  This behavior can be modified by passing a
        collision function.

        >>> d.update({'b': 3, 'd': 9}, _ADD_)
        >>> print d
        {'a': 0, 'b': 4, 'c': 2, 'd': 9}

        >>> d._default = 5          #manually change default value
        >>> d.update(['b','c'])     #update from non-dict
        >>> d._default = 1          #set back to original
        >>> d.update('abd')         #string are iterated
        >>> print d
        {'a': 1, 'b': 1, 'c': 5, 'd': 1}
        >>> d.update('de', _ADD_)
        >>> print d
        {'a': 1, 'b': 1, 'c': 5, 'd': 2, 'e': 1}

        NOTE:  If collision function raises an exception, DefDict may be
        left in partially-updated state.
        """
        #perhaps should catch any exceptions that may be caused by collision
        #  and store aberrent keys in the exception to be reported later.
        if isinstance(other, dict):
            for key, value in other.iteritems():
                self.setdefault(key, value, collision)
        else:  #given iterable
            for key in other: 
                self.setdefault(key, self._default, collision)

    def setdefault(self, key, value = _use_default_, collision=_RETAIN_):
        """Behaves like standard dict.setdefault, but uses value in _default attribute 
        if no default parameter specified.

        >>> dd = DefDict(default=5)
        >>> dd.setdefault('a', 10), dd.setdefault('b'), dd.setdefault('b', 11)
        (10, 5, 5)
        >>> print dd
        {'a': 10, 'b': 5}

        A collision function can also be passed to override setdefault's
        standard RETAIN semantics.

        >>> dd.setdefault('a', collision=_OVERWRITE_), dd.setdefault('b', 6, _ADD_)
        (5, 11)
        >>> dd.setdefault('b', 12, _MAX_), dd.setdefault('b', 10, _MAX_)
        (12, 12)
        >>> dd.setdefault('c', 10, _RAISE_)
        10
        >>> dd.setdefault('c', 10, _RAISE_)
        Traceback (most recent call last):
        KeyAlreadyExists: 'c'
        >>> print dd
        {'a': 5, 'b': 12, 'c': 10}

        Default value is NOT copied if non-simple type (ex. list, dict).
        If values must be distinct objects, then you must subclass and
        override this method or __setitem__() to create a copy of the default.

        >>> dd = DefDict(default=[])
        >>> dd.setdefault(1), dd.setdefault(2)
        ([], [])
        >>> dd[1] is dd[2]  #keys 1 and 2 do have distinct list objects
        True
        >>> dd[2].append(42) #will also change value in dd[1]!!!
        >>> print dd
        {1: [42], 2: [42]}
        """
        key_absent = key not in self   #fail immediately if key unhashable
        if value == _use_default_: value = self._default #XXX should make copy for non-simple default, or rely on __setitem__() to make copy?
        if collision == _OVERWRITE_ or key_absent:
            self[key] = value #note: subclass may override setitem method so value may be modified
        else:
            collision(self, key, value)
        return dict.__getitem__(self, key)  #may wish to allow dd[key] to insert key in dd with default value

    def get(self, key, *args):
        """Behaves like standard dict.get, but uses value in _default attribute 
        if no default parameter specified.

        >>> dd = DefDict({'a': 10}, 0)
        >>> dd.get('a'), dd.get('b'), dd.get('b', 11)
        (10, 0, 11)
        >>> print dd
        {'a': 10}
        """
        if not args: args = (self._default,)
        return dict.get(self, key, *args)

    def copy(self):
        """Return shallow copy of dictionary.

        >>> dd = DefDict.fromkeys(range(5), 5)
        >>> ddcopy = dd.copy()
        >>> print ddcopy._default, isinstance(ddcopy, DefDict); print ddcopy
        5 True
        {0: 5, 1: 5, 2: 5, 3: 5, 4: 5}
        >>> ddcopy[0] = 7
        >>> print dd[0], ddcopy[0]
        5 7
        """
        return self.__class__(self, self._default)

    __copy__ = copy

    def __str__(self):
        """Convert self to string with keys in sorted order.

        >>> str(DefDict())
        '{}'
        >>> str(DefDict({9: 0, 'test': 0, 'a': 0, 0: 0}))
        "{0: 0, 9: 0, 'a': 0, 'test': 0}"
        """
        if not self: return '{}'    #nothing to sort
        keys = self.keys()
        keys.sort()
        return '{' + ', '.join(["%r: %s" % (k, self[k]) for k in keys]) + '}'


if __name__ == "__main__":
    import doctest
    print doctest.testmod()

This is a great solution for making sophisticated data structures that manipulate values of dictionaries in some way. Very easy to make a Counter class for example, or a Graph abstract class.

Created by user on Thu, 15 Oct 2015 (GPL)
Python recipes (4591)
user's recipes (4)

Required Modules

  • (none specified)

Other Information and Tasks