""" altsets.py -- An alternate implementation of Sets.py Implements set operations using sorted lists as the underlying data structure. Advantages: * Space savings -- lists are much more compact than a dictionary based implementation. * Flexibility -- elements do not need to be hashable, only __cmp__ is required. * Fast operations depending on the underlying data patterns. Non-overlapping sets get united, intersected, or differenced with only log(N) element comparisons. Results are built using fast-slicing. * Algorithms are designed to minimize the number of compares which can be expensive. * Natural support for sets of sets. No special accomodation needs to be made to use a set or dict as a set member, but users need to be careful to not mutate a member of a set since that may breaks its sort invariant. Disadvantages: * Set construction uses list.sort() with potentially N log(N) comparisons. * Membership testing and element addition use log(N) comparisons. Element addition uses list.insert() with takes O(N) time. ToDo: * Make the search routine adapt to the data; falling backing to a linear search when encountering random data. """ from bisect import bisect_left, insort_left class Set(object): def __init__(self, iterable): data = list(iterable) data.sort() result = data[:1] for elem in data[1:]: if elem == result[-1]: continue result.append(elem) self.data = result def __repr__(self): return 'Set(' + repr(self.data) + ')' def __iter__(self): return iter(self.data) def __contains__(self, elem): data = self.data i = bisect_left(self.data, elem, 0) return i<len(data) and data[i] == elem def add(self, elem): if elem not in self: insort_left(self.data, elem) def remove(self, elem): data = self.data i = bisect_left(self.data, elem, 0) if i<len(data) and data[i] == elem: del data[i] def _getotherdata(other): if not isinstance(other, Set): other = Set(other) return other.data _getotherdata = staticmethod(_getotherdata) def __cmp__(self, other, cmp=cmp): return cmp(self.data, Set._getotherdata(other)) def union(self, other, find=bisect_left): i = j = 0 x = self.data y = Set._getotherdata(other) result = Set([]) append = result.data.append extend = result.data.extend try: while 1: if x[i] == y[j]: append(x[i]) i += 1 j += 1 elif x[i] > y[j]: cut = find(y, x[i], j) extend(y[j:cut]) j = cut else: cut = find(x, y[j], i) extend(x[i:cut]) i = cut except IndexError: extend(x[i:]) extend(y[j:]) return result def intersection(self, other, find=bisect_left): i = j = 0 x = self.data y = Set._getotherdata(other) result = Set([]) append = result.data.append try: while 1: if x[i] == y[j]: append(x[i]) i += 1 j += 1 elif x[i] > y[j]: j = find(y, x[i], j) else: i = find(x, y[j], i) except IndexError: pass return result def difference(self, other, find=bisect_left): i = j = 0 x = self.data y = Set._getotherdata(other) result = Set([]) extend = result.data.extend try: while 1: if x[i] == y[j]: i += 1 j += 1 elif x[i] > y[j]: j = find(y, x[i], j) else: cut = find(x, y[j], i) extend(x[i:cut]) i = cut except IndexError: extend(x[i:]) return result def symmetric_difference(self, other, find=bisect_left): i = j = 0 x = self.data y = Set._getotherdata(other) result = Set([]) extend = result.data.extend try: while 1: if x[i] == y[j]: i += 1 j += 1 elif x[i] > y[j]: cut = find(y, x[i], j) extend(y[j:cut]) j = cut else: cut = find(x, y[j], i) extend(x[i:cut]) i = cut except IndexError: extend(x[i:]) extend(y[j:]) return result a = Set('abracadabra') b = Set('alacazam') print a < b print a print b print map(a.__contains__, list('abcdr')) print map(a.__contains__, list('0ey')) print list(a) print a.union(b), ' :union' print b.union(a), ' :union' print a.intersection(b), ' :intersection' print a.difference(b), ' :difference' print b.difference(a), ' :difference' print a.symmetric_difference(b), ' :symmetric_difference' print b.symmetric_difference(a), ' :symmetric_difference' print a.intersection(b).union(a.symmetric_difference(b)) == a.union(b) print a.intersection(b).intersection(a.symmetric_difference(b)) == Set([])