Welcome, guest | Sign In | My Account | Store | Cart

A bag: a set-like container that simply counts the number of same items held within it.

Python, 429 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
#!/usr/bin/env python
# licensed under the CreativeCommons, Share-Alike, Attribution (CC-BY-SA from creativecommons.com)
# email: dreamingforward@gmail.com
 
"""Bag types:  a set-like container that counts the number of same items rather than consuming memory for storage."""

import random  #pick()

_DEBUG = True

class IntegerBag(dict):
    """Implements a bag type that allows item counts to be negative."""

    __slots__ = []

    def __init__(self, init={}):
        """Initialize bag with optional contents.

        >>> b = Bag()   #creates empty bag
        >>> b
        {}
        >>> print(IntegerBag({1: -1, 2: 0, 3: -9}))
        {1: -1, 3: -9}

        Can initialize with (key, count) list as in standard dict.
        However, duplicate keys will accumulate counts:
        >>> print(Bag([(1, 2), (2, 4), (1, 7)]))
        {1: 9, 2: 4}
        """
        if not init or isinstance(init, self.__class__):
            dict.__init__(self, init)   #values known to be good, use faster dict creation
        else:   #initializing with list or plain dict
            dict.__init__(self)
            if isinstance(init, dict):
                for key, count in init.items():
                    self[key] = count #will test invariants
            else:       #sequence may contain duplicates, so add to existing value, if any
                for key, count in init:
                    self[key] += count

    def fromkeys(cls, iterable, count=1):
        """Class method which creates bag from iterable adding optional count for each item.

        >>> b = Bag({'b': 2, 'c': 1, 'a': 3})
        >>> b2 = Bag.fromkeys(['a', 'b', 'c', 'b', 'a', 'a'])
        >>> b3 = Bag.fromkeys("abacab")
        >>> assert b == b2 == b3

        >>> word_count = Bag.fromkeys("how much wood could a wood chuck chuck".split())
        >>> print(word_count)
        {'a': 1, 'chuck': 2, 'could': 1, 'how': 1, 'much': 1, 'wood': 2}

        An optional count can be specified.  Count added each time item is encountered.
        >>> print(Bag.fromkeys("abacab", 5))
        {'a': 15, 'b': 10, 'c': 5}
        """
        b = cls()
        for key in iterable: #could just return b.__iadd__(iterable, count)
            b[key] += count  #perhaps slower than necessary but will simplify derived classes that override __setitem__()
        return b

    fromkeys = classmethod(fromkeys)

    def update(self, items, count=1):
        """Adds contents to bag from other mapping type or iterable.

        >>> ib = IntegerBag.fromkeys('abc')
        >>> ib.update({'a': 2, 'b': 1, 'c': 0})
        >>> print(ib)
        {'a': 3, 'b': 2, 'c': 1}

        Negative updates are allowable.
        >>> ib.update({'a': -2, 'b': -2, 'c': -2, 'd': 2})
        >>> print(ib)
        {'a': 1, 'c': -1, 'd': 2}

        Can call with iterable.  Amount added can be specified by count parameter:
        >>> ib.update(['a','b'], 2)
        >>> print(ib)
        {'a': 3, 'b': 2, 'c': -1, 'd': 2}

        Values that can't be converted to ints are skipped and will raise TypeError.
        >>> ib.update({0: 'test1', 'a': 'test2', 'd': 3, 'f': '1.0', 'c': 2.0})
        Traceback (most recent call last):
        TypeError: unsupported operand type(s) for +=: 'int' and 'str'
        >>> print(ib)
        {'a': 3, 'b': 2, 'c': 1, 'd': 5}

        Updating Bag with values that would cause the count to go negative
        sets count to 0, removing item.
        >>> b = Bag({'a': 1, 'c': 2, 'd': 5})
        >>> b.update({'a': -4, 'b': -1, 'c': -2, 'd': -2})
        >>> print(b)
        {'d': 3}

        NOTE:  Exceptions are only reported on the last bad element encountered.
        """
        #XXX may be able to improve this by calling dict methods directly and/or using map and operator functions
        #XXX should raise exception and return unchanged self if problem encountered!
        #XXX or use logging.warning() and continue
        err = False
        if isinstance(items, dict):
            for key, count in items.items():
                try:
                    self[key] += count  #may be slower than necessary
                except TypeError as error: err = True #FIXME should have to re-assign to propagate error:  check docs
        else: #sequence
            for key in items:
                try:
                    self[key] += count
                except TypeError as error: err = Trie
        if err: raise TypeError(error)

    def pick(self, count=1, remove=True): #XXX perhaps better to default to False?
        """Returns a bag with 'count' random items from bag (defaults to 1), removing the items unless told otherwise.

        >>> b = IntegerBag({'a': 3, 'b': -2, 'c': 1})
        >>> sub = b.pick(4)
        >>> sub.size, b.size
        (4, 2)
        """
        l = list(self.itereach())
        picked = IntegerBag(random.sample(l, min(abs(count), len(l))))
        if count < 0:  picked *= (-1)  #this probably not useful except for Network class
        if remove: self -= picked
        return picked

    def pop(self, item):
        """Remove all of item from bag, returning its count, if any.

        >>> b = IntegerBag.fromkeys("abacab")
        >>> b.pop('b')
        2
        >>> b.pop('z')
        0
        >>> print(b)
        {'a': 3, 'c': 1}
        """
        return super(IntegerBag, self).pop(item, 0)

    def discard(self, item):
        """Removes all of the specified item if it exists, otherwise ignored.

        >>> b = Bag.fromkeys("abacab")
        >>> b.discard('b')
        >>> b.discard('d')  #non-existent items ignored
        >>> print(b)
        {'a': 3, 'c': 1}
        """
        try: del self[item] #note: this does not call __getitem__
        except KeyError: pass

    def setdefault(self, item, count=1):
        count = self._filter(count)
        return count and dict.setdefault(self, item, count)

    def itereach(self):  #XXX consider rename akin to Python3 rules
        """Will iterate through all items in bag individually.

        >>> b = Bag.fromkeys("abacab")
        >>> l = list(b.itereach()); l.sort()
        >>> l
        [('a', 1), ('a', 1), ('a', 1), ('b', 1), ('b', 1), ('c', 1)]
        >>> b = IntegerBag(b)
        >>> b['b'] = -2
        >>> l = list(b.itereach()); l.sort()
        >>> l
        [('a', 1), ('a', 1), ('a', 1), ('b', -1), ('b', -1), ('c', 1)]

        Note: iteration on bag itself just iterates through unique keys:
        >>> l = list(b) ; l.sort()
        >>> l
        ['a', 'b', 'c']
        """
        for key, count in self.items():
            for i in range(abs(count)):
                yield (key, count >= 0 and 1 or -1) #consider returning (key, +/-1) pair to account for negative counts

    def __iadd__(self, other):
        """Add items in bag.

        >>> b = Bag()
        >>> b += [1, 2, 1, 0]
        >>> print(b)
        {0: 1, 1: 2, 2: 1}
        >>> b.clear()
        >>> b += "abca"
        >>> print(b)
        {'a': 2, 'b': 1, 'c': 1}
        """
        self.update(other, 1)  #XXX may fail mid-update...
        return self

    def __add__(self, other):  #XXX better way to create copy?? (in case self.__class__ has more complicated constructor...)
        """Add one bag to another, returns type of first bag.

        >>> b = IntegerBag({1: 2, 2: -2}) + Bag({1: 5, 2: 1, 3: 7})
        >>> b, "IntegerBag" in str(type(b))
        ({1: 7, 2: -1, 3: 7}, True)
        """
        return self.__class__(self).__iadd__(other)

    def __isub__(self, other):
        """Subtract items from bag.

        >>> b = Bag.fromkeys("abacab")
        >>> b -= "cccccab"
        >>> print(b)
        {'a': 2, 'b': 1}
        """
        if isinstance(other, dict):
            other = IntegerBag(other) * (-1)
        self.update(other, -1)
        return self

    def __sub__(self, other):
        """Subtract items from bag.

        >>> IntegerBag({1: 2, 2: -2}) - {1: 5, 2: -2, 3: 7}
        {1: -3, 3: -7}
        """
        return self.__class__(self).__isub__(other)

    def __imul__(self, factor):
        """Multiply bag contents by factor.

        >>> b = Bag.fromkeys("abacab")
        >>> b *= 4
        >>> print(b)
        {'a': 12, 'b': 8, 'c': 4}

        Negative factors can be used with IntegerBag.
        >>> ib = IntegerBag(b)
        >>> ib *= -1
        >>> print(ib)
        {'a': -12, 'b': -8, 'c': -4}

        Trying that on a Bag will return empty bag (akin to list behavior).
        >>> b *= -1
        >>> b
        {}

        Zero factors will return empty bag.
        >>> b += "abacab"
        >>> b *= 0
        >>> b
        {}

        """
        if self._filter(factor):
            for item, count in self.items():
                dict.__setitem__(self, item, count*factor) #bypass test logic in bag.__setitem__
        else:   #factor==0 or negative on Bag
            dict.clear(self)    #call dict.clear to protect subclass which might override and do other things besides clear dict values
        return self

    def __mul__(self, factor):
        """Returns new bag of same type multiplied by factor.

        >>> d = {1: 2, 2: 4, 3: -9}
        >>> IntegerBag(d) * -1
        {1: -2, 2: -4, 3: 9}
        >>> Bag(d) * -1
        {}
        """
        #XXX should perhaps use explicit IntBag in case derived class needs parameters -- or use copy()???
        return self.__class__(self).__imul__(factor)

    def _size(self):
        """Returns sum of absolute value of item counts in bag.

        >>> b = IntegerBag.fromkeys("abacab")
        >>> b['a'] = -4
        >>> b.size
        7
        """
        return sum(map(abs, self.values()))

    size = property(_size, None, None, "Sum of absolute count values in the bag")

    def __getitem__(self, item):
        """Returns total count for given item, or zero if item not in bag.

        >>> b = Bag.fromkeys("abacab")
        >>> b['a']
        3
        >>> b['d']
        0
        """
        return self.get(item, 0)

    count = __getitem__

    def __setitem__(self, item, count):
        """Sets the count for the given item in bag, removing if zero.

        >>> b = Bag()
        >>> b[1] = 3
        >>> b[3] = 1.6  #floats get coerced to ints
        >>> b[4] = "2"  #as do int strings
        >>> print(b)
        {1: 3, 3: 1, 4: 2}

        If count is zero, all 'matching items' are deleted from bag.
        >>> b[2] = 0
        >>> print(b)
        {1: 3, 3: 1, 4: 2}

        Counts for IntegerBag are allowed to be negative.
        >>> ib = IntegerBag(b)
        >>> ib[4] = -2
        >>> ib[5] -= 2
        >>> ib[1] -= 4
        >>> ib[3] -= 1
        >>> print(ib)
        {1: -1, 4: -2, 5: -2}

        Trying to set negative values on Bag reverts to zero.
        >>> b[4] = -2
        >>> b[4]
        0

        If count is non-integer, an exception is raised.
        >>> b[1] = "oops"  #doctest: +IGNORE_EXCEPTION_DETAIL
        Traceback (most recent call last):
        ValueError: invalid literal for int(): oops
        """
        count = self._filter(count)
        if count:
            dict.__setitem__(self, item, count)  #XXX should this call super instead of dict (for cases of multiple inheritence etc...)
        else:   #setting to 0 so discard key
            self.discard(item)

    def __str__(self):
        """Convert self to string with items in sorted order.

        >>> str(IntegerBag())
        '{}'
        >>> str(IntegerBag({'b': -2, 'a': 3, 'c': 1, 1: 0}))
        "{'a': 3, 'b': -2, 'c': 1}"
        >>> str(Bag.fromkeys("abacab"))
        "{'a': 3, 'b': 2, 'c': 1}"
        """
        #sort by values, largest first? should we sort at all?
        if _DEBUG: self._validate()
        if not self: return '{}'    #nothing to sort
        keys = sorted(self) #this extra assigment necessary???  !Must remember basic python!...
        return '{%s}' % ', '.join(["%r: %r" % (k, self[k]) for k in keys])

    def _filter(value): #XXX could just set _filter = int but doctest complains even under Python 2.3.3
        """Coerces value to int and returns it, or raise raises TypeError."""
        return int(value)

    _filter = staticmethod(_filter)

    def _validate(self):
        """Check class invariants.

        >>> b = IntegerBag.fromkeys("abc")
        >>> dict.__setitem__(b, 'a', "oops")
        >>> b._validate() #doctest: +IGNORE_EXCEPTION_DETAIL
        Traceback (most recent call last):
        ValueError: invalid literal for int(): oops
        >>> b = Bag()
        >>> dict.__setitem__(b, 'a', 0)    #zero values are normally deleted
        >>> b
        {'a': 0}
        >>> b._validate()
        Traceback (most recent call last):
        AssertionError: zero value encountered
        >>> b = Bag()
        >>> dict.__setitem__(b, 'a', -1)   #negative values not allowed
        >>> b._validate()
        Traceback (most recent call last):
        AssertionError: unfiltered value
        """
        for count in self.values():
            assert count == self._filter(count), "unfiltered value"
            assert count, "zero value encountered"


class Bag(IntegerBag):
    """Standard bag class.  Allows only non-negative bag counts."""

    __slots__ = []

    def _size(self):
        """Returns total number of items in bag.

        >>> b = Bag.fromkeys("abacab")
        >>> b.size
        6
        """
        return sum(self.values())

    size = property(_size, None, None, "Sum of all bag values.")

    def _filter(value):
        """Returns 0 if value is negative. """
        return max(int(value), 0)

    _filter = staticmethod(_filter)


def _test():
    """Miscillaneous tests:

    Equality test.  Can compare against dictionary or bag types.
    >>> Bag.fromkeys("abacab") == {'a': 3, 'b': 2, 'c': 1}
    True
    >>> b, l = Bag.fromkeys([1, 2, 1, 3, 1]), [1, 1, 1, 3, 2]
    >>> b == l
    False
    >>> b == Bag.fromkeys(l) == IntegerBag.fromkeys(l)
    True

    Tests for non-zero:
    >>> b = Bag()
    >>> bool(b)
    False
    >>> b += [0]
    >>> bool(b)
    True
    """
    pass

if __name__ == "__main__":
    import doctest
    return doctest.testmod()
Created by user on Wed, 17 Jul 2013 (MIT)
Python recipes (4591)
user's recipes (4)

Required Modules

Other Information and Tasks