#!/usr/bin/env python
# licensed under the CreativeCommons, Share-Alike, Attribution (CC-BY-SA from creativecommons.com)
# email: dreamingforward@gmail.com
"""Bag types: a set-like container that counts the number of same items rather than consuming memory for storage."""
import random #pick()
_DEBUG = True
class IntegerBag(dict):
"""Implements a bag type that allows item counts to be negative."""
__slots__ = []
def __init__(self, init={}):
"""Initialize bag with optional contents.
>>> b = Bag() #creates empty bag
>>> b
{}
>>> print(IntegerBag({1: -1, 2: 0, 3: -9}))
{1: -1, 3: -9}
Can initialize with (key, count) list as in standard dict.
However, duplicate keys will accumulate counts:
>>> print(Bag([(1, 2), (2, 4), (1, 7)]))
{1: 9, 2: 4}
"""
if not init or isinstance(init, self.__class__):
dict.__init__(self, init) #values known to be good, use faster dict creation
else: #initializing with list or plain dict
dict.__init__(self)
if isinstance(init, dict):
for key, count in init.items():
self[key] = count #will test invariants
else: #sequence may contain duplicates, so add to existing value, if any
for key, count in init:
self[key] += count
def fromkeys(cls, iterable, count=1):
"""Class method which creates bag from iterable adding optional count for each item.
>>> b = Bag({'b': 2, 'c': 1, 'a': 3})
>>> b2 = Bag.fromkeys(['a', 'b', 'c', 'b', 'a', 'a'])
>>> b3 = Bag.fromkeys("abacab")
>>> assert b == b2 == b3
>>> word_count = Bag.fromkeys("how much wood could a wood chuck chuck".split())
>>> print(word_count)
{'a': 1, 'chuck': 2, 'could': 1, 'how': 1, 'much': 1, 'wood': 2}
An optional count can be specified. Count added each time item is encountered.
>>> print(Bag.fromkeys("abacab", 5))
{'a': 15, 'b': 10, 'c': 5}
"""
b = cls()
for key in iterable: #could just return b.__iadd__(iterable, count)
b[key] += count #perhaps slower than necessary but will simplify derived classes that override __setitem__()
return b
fromkeys = classmethod(fromkeys)
def update(self, items, count=1):
"""Adds contents to bag from other mapping type or iterable.
>>> ib = IntegerBag.fromkeys('abc')
>>> ib.update({'a': 2, 'b': 1, 'c': 0})
>>> print(ib)
{'a': 3, 'b': 2, 'c': 1}
Negative updates are allowable.
>>> ib.update({'a': -2, 'b': -2, 'c': -2, 'd': 2})
>>> print(ib)
{'a': 1, 'c': -1, 'd': 2}
Can call with iterable. Amount added can be specified by count parameter:
>>> ib.update(['a','b'], 2)
>>> print(ib)
{'a': 3, 'b': 2, 'c': -1, 'd': 2}
Values that can't be converted to ints are skipped and will raise TypeError.
>>> ib.update({0: 'test1', 'a': 'test2', 'd': 3, 'f': '1.0', 'c': 2.0})
Traceback (most recent call last):
TypeError: unsupported operand type(s) for +=: 'int' and 'str'
>>> print(ib)
{'a': 3, 'b': 2, 'c': 1, 'd': 5}
Updating Bag with values that would cause the count to go negative
sets count to 0, removing item.
>>> b = Bag({'a': 1, 'c': 2, 'd': 5})
>>> b.update({'a': -4, 'b': -1, 'c': -2, 'd': -2})
>>> print(b)
{'d': 3}
NOTE: Exceptions are only reported on the last bad element encountered.
"""
#XXX may be able to improve this by calling dict methods directly and/or using map and operator functions
#XXX should raise exception and return unchanged self if problem encountered!
#XXX or use logging.warning() and continue
err = False
if isinstance(items, dict):
for key, count in items.items():
try:
self[key] += count #may be slower than necessary
except TypeError as error: err = True #FIXME should have to re-assign to propagate error: check docs
else: #sequence
for key in items:
try:
self[key] += count
except TypeError as error: err = Trie
if err: raise TypeError(error)
def pick(self, count=1, remove=True): #XXX perhaps better to default to False?
"""Returns a bag with 'count' random items from bag (defaults to 1), removing the items unless told otherwise.
>>> b = IntegerBag({'a': 3, 'b': -2, 'c': 1})
>>> sub = b.pick(4)
>>> sub.size, b.size
(4, 2)
"""
l = list(self.itereach())
picked = IntegerBag(random.sample(l, min(abs(count), len(l))))
if count < 0: picked *= (-1) #this probably not useful except for Network class
if remove: self -= picked
return picked
def pop(self, item):
"""Remove all of item from bag, returning its count, if any.
>>> b = IntegerBag.fromkeys("abacab")
>>> b.pop('b')
2
>>> b.pop('z')
0
>>> print(b)
{'a': 3, 'c': 1}
"""
return super(IntegerBag, self).pop(item, 0)
def discard(self, item):
"""Removes all of the specified item if it exists, otherwise ignored.
>>> b = Bag.fromkeys("abacab")
>>> b.discard('b')
>>> b.discard('d') #non-existent items ignored
>>> print(b)
{'a': 3, 'c': 1}
"""
try: del self[item] #note: this does not call __getitem__
except KeyError: pass
def setdefault(self, item, count=1):
count = self._filter(count)
return count and dict.setdefault(self, item, count)
def itereach(self): #XXX consider rename akin to Python3 rules
"""Will iterate through all items in bag individually.
>>> b = Bag.fromkeys("abacab")
>>> l = list(b.itereach()); l.sort()
>>> l
[('a', 1), ('a', 1), ('a', 1), ('b', 1), ('b', 1), ('c', 1)]
>>> b = IntegerBag(b)
>>> b['b'] = -2
>>> l = list(b.itereach()); l.sort()
>>> l
[('a', 1), ('a', 1), ('a', 1), ('b', -1), ('b', -1), ('c', 1)]
Note: iteration on bag itself just iterates through unique keys:
>>> l = list(b) ; l.sort()
>>> l
['a', 'b', 'c']
"""
for key, count in self.items():
for i in range(abs(count)):
yield (key, count >= 0 and 1 or -1) #consider returning (key, +/-1) pair to account for negative counts
def __iadd__(self, other):
"""Add items in bag.
>>> b = Bag()
>>> b += [1, 2, 1, 0]
>>> print(b)
{0: 1, 1: 2, 2: 1}
>>> b.clear()
>>> b += "abca"
>>> print(b)
{'a': 2, 'b': 1, 'c': 1}
"""
self.update(other, 1) #XXX may fail mid-update...
return self
def __add__(self, other): #XXX better way to create copy?? (in case self.__class__ has more complicated constructor...)
"""Add one bag to another, returns type of first bag.
>>> b = IntegerBag({1: 2, 2: -2}) + Bag({1: 5, 2: 1, 3: 7})
>>> b, "IntegerBag" in str(type(b))
({1: 7, 2: -1, 3: 7}, True)
"""
return self.__class__(self).__iadd__(other)
def __isub__(self, other):
"""Subtract items from bag.
>>> b = Bag.fromkeys("abacab")
>>> b -= "cccccab"
>>> print(b)
{'a': 2, 'b': 1}
"""
if isinstance(other, dict):
other = IntegerBag(other) * (-1)
self.update(other, -1)
return self
def __sub__(self, other):
"""Subtract items from bag.
>>> IntegerBag({1: 2, 2: -2}) - {1: 5, 2: -2, 3: 7}
{1: -3, 3: -7}
"""
return self.__class__(self).__isub__(other)
def __imul__(self, factor):
"""Multiply bag contents by factor.
>>> b = Bag.fromkeys("abacab")
>>> b *= 4
>>> print(b)
{'a': 12, 'b': 8, 'c': 4}
Negative factors can be used with IntegerBag.
>>> ib = IntegerBag(b)
>>> ib *= -1
>>> print(ib)
{'a': -12, 'b': -8, 'c': -4}
Trying that on a Bag will return empty bag (akin to list behavior).
>>> b *= -1
>>> b
{}
Zero factors will return empty bag.
>>> b += "abacab"
>>> b *= 0
>>> b
{}
"""
if self._filter(factor):
for item, count in self.items():
dict.__setitem__(self, item, count*factor) #bypass test logic in bag.__setitem__
else: #factor==0 or negative on Bag
dict.clear(self) #call dict.clear to protect subclass which might override and do other things besides clear dict values
return self
def __mul__(self, factor):
"""Returns new bag of same type multiplied by factor.
>>> d = {1: 2, 2: 4, 3: -9}
>>> IntegerBag(d) * -1
{1: -2, 2: -4, 3: 9}
>>> Bag(d) * -1
{}
"""
#XXX should perhaps use explicit IntBag in case derived class needs parameters -- or use copy()???
return self.__class__(self).__imul__(factor)
def _size(self):
"""Returns sum of absolute value of item counts in bag.
>>> b = IntegerBag.fromkeys("abacab")
>>> b['a'] = -4
>>> b.size
7
"""
return sum(map(abs, self.values()))
size = property(_size, None, None, "Sum of absolute count values in the bag")
def __getitem__(self, item):
"""Returns total count for given item, or zero if item not in bag.
>>> b = Bag.fromkeys("abacab")
>>> b['a']
3
>>> b['d']
0
"""
return self.get(item, 0)
count = __getitem__
def __setitem__(self, item, count):
"""Sets the count for the given item in bag, removing if zero.
>>> b = Bag()
>>> b[1] = 3
>>> b[3] = 1.6 #floats get coerced to ints
>>> b[4] = "2" #as do int strings
>>> print(b)
{1: 3, 3: 1, 4: 2}
If count is zero, all 'matching items' are deleted from bag.
>>> b[2] = 0
>>> print(b)
{1: 3, 3: 1, 4: 2}
Counts for IntegerBag are allowed to be negative.
>>> ib = IntegerBag(b)
>>> ib[4] = -2
>>> ib[5] -= 2
>>> ib[1] -= 4
>>> ib[3] -= 1
>>> print(ib)
{1: -1, 4: -2, 5: -2}
Trying to set negative values on Bag reverts to zero.
>>> b[4] = -2
>>> b[4]
0
If count is non-integer, an exception is raised.
>>> b[1] = "oops" #doctest: +IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
ValueError: invalid literal for int(): oops
"""
count = self._filter(count)
if count:
dict.__setitem__(self, item, count) #XXX should this call super instead of dict (for cases of multiple inheritence etc...)
else: #setting to 0 so discard key
self.discard(item)
def __str__(self):
"""Convert self to string with items in sorted order.
>>> str(IntegerBag())
'{}'
>>> str(IntegerBag({'b': -2, 'a': 3, 'c': 1, 1: 0}))
"{'a': 3, 'b': -2, 'c': 1}"
>>> str(Bag.fromkeys("abacab"))
"{'a': 3, 'b': 2, 'c': 1}"
"""
#sort by values, largest first? should we sort at all?
if _DEBUG: self._validate()
if not self: return '{}' #nothing to sort
keys = sorted(self) #this extra assigment necessary??? !Must remember basic python!...
return '{%s}' % ', '.join(["%r: %r" % (k, self[k]) for k in keys])
def _filter(value): #XXX could just set _filter = int but doctest complains even under Python 2.3.3
"""Coerces value to int and returns it, or raise raises TypeError."""
return int(value)
_filter = staticmethod(_filter)
def _validate(self):
"""Check class invariants.
>>> b = IntegerBag.fromkeys("abc")
>>> dict.__setitem__(b, 'a', "oops")
>>> b._validate() #doctest: +IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
ValueError: invalid literal for int(): oops
>>> b = Bag()
>>> dict.__setitem__(b, 'a', 0) #zero values are normally deleted
>>> b
{'a': 0}
>>> b._validate()
Traceback (most recent call last):
AssertionError: zero value encountered
>>> b = Bag()
>>> dict.__setitem__(b, 'a', -1) #negative values not allowed
>>> b._validate()
Traceback (most recent call last):
AssertionError: unfiltered value
"""
for count in self.values():
assert count == self._filter(count), "unfiltered value"
assert count, "zero value encountered"
class Bag(IntegerBag):
"""Standard bag class. Allows only non-negative bag counts."""
__slots__ = []
def _size(self):
"""Returns total number of items in bag.
>>> b = Bag.fromkeys("abacab")
>>> b.size
6
"""
return sum(self.values())
size = property(_size, None, None, "Sum of all bag values.")
def _filter(value):
"""Returns 0 if value is negative. """
return max(int(value), 0)
_filter = staticmethod(_filter)
def _test():
"""Miscillaneous tests:
Equality test. Can compare against dictionary or bag types.
>>> Bag.fromkeys("abacab") == {'a': 3, 'b': 2, 'c': 1}
True
>>> b, l = Bag.fromkeys([1, 2, 1, 3, 1]), [1, 1, 1, 3, 2]
>>> b == l
False
>>> b == Bag.fromkeys(l) == IntegerBag.fromkeys(l)
True
Tests for non-zero:
>>> b = Bag()
>>> bool(b)
False
>>> b += [0]
>>> bool(b)
True
"""
pass
if __name__ == "__main__":
import doctest
return doctest.testmod()
Diff to Previous Revision
--- revision 7 2013-07-19 21:07:46
+++ revision 8 2013-07-19 21:08:46
@@ -422,6 +422,7 @@
>>> bool(b)
True
"""
+ pass
if __name__ == "__main__":
import doctest