from operator import itemgetter
from heapq import nlargest
class bag(object):
def __init__(self, iterable=()):
self._data = {}
self._len = 0
self.update(iterable)
def update(self, iterable):
if isinstance(iterable, dict):
for elem, n in iterable.iteritems():
self[elem] += n
else:
for elem in iterable:
self[elem] += 1
def __contains__(self, elem):
return elem in self._data
def __getitem__(self, elem):
return self._data.get(elem, 0)
def __setitem__(self, elem, n):
self._len += n - self[elem]
self._data[elem] = n
if n == 0:
del self._data[elem]
def __delitem__(self, elem):
self._len -= self[elem]
del self._data[elem]
def __len__(self):
assert self._len == sum(self._data.itervalues())
return self._len
def __eq__(self, other):
if not isinstance(other, bag):
return False
return self._data == other._data
def __ne__(self, other):
if not isinstance(other, bag):
return True
return self._data != other._data
def __hash__(self):
raise TypeError
def __repr__(self):
return 'bag(%r)' % self._data
def copy(self):
return self.__class__(self)
__copy__ = copy # For the copy module
def __deepcopy__(self, memo):
from copy import deepcopy
result = self.__class__()
memo[id(self)] = result
data = result._data
result._data = deepcopy(self._data, memo)
result._len = self._len
return result
def __getstate__(self):
return self._data.copy(), self._len
def __setstate__(self, data):
self._data = data[0].copy()
self._len = data[1]
def clear(self):
self._data.clear()
self._len = 0
def __iter__(self):
for elem, cnt in self._data.iteritems():
for i in xrange(cnt):
yield elem
def iterunique(self):
return self._data.iterkeys()
def itercounts(self):
return self._data.iteritems()
def mostcommon(self, n=None):
if n is None:
return sorted(self.itercounts(), key=itemgetter(1), reverse=True)
it = enumerate(self.itercounts())
nl = nlargest(n, ((cnt, i, elem) for (i, (elem, cnt)) in it))
return [(elem, cnt) for cnt, i, elem in nl]