Welcome, guest | Sign In | My Account | Store | Cart
```""" altsets.py -- An alternate implementation of Sets.py

Implements set operations using sorted lists as the underlying data structure.

* Space savings -- lists are much more compact than a dictionary
based implementation.

* Flexibility -- elements do not need to be hashable, only __cmp__
is required.

* Fast operations depending on the underlying data patterns.
Non-overlapping sets get united, intersected, or differenced
with only log(N) element comparisons.  Results are built using
fast-slicing.

* Algorithms are designed to minimize the number of compares
which can be expensive.

* Natural support for sets of sets.  No special accomodation needs to
be made to use a set or dict as a set member, but users need to be
careful to not mutate a member of a set since that may breaks its
sort invariant.

* Set construction uses list.sort() with potentially N log(N)
comparisons.

* Membership testing and element addition use log(N) comparisons.
Element addition uses list.insert() with takes O(N) time.

ToDo:

* Make the search routine adapt to the data; falling backing to
a linear search when encountering random data.

"""

from bisect import bisect_left, insort_left

class Set(object):

def __init__(self, iterable):
data = list(iterable)
data.sort()
result = data[:1]
for elem in data[1:]:
if elem == result[-1]:
continue
result.append(elem)
self.data = result

def __repr__(self):
return 'Set(' + repr(self.data) + ')'

def __iter__(self):
return iter(self.data)

def __contains__(self, elem):
data = self.data
i = bisect_left(self.data, elem, 0)
return i<len(data) and data[i] == elem

if elem not in self:
insort_left(self.data, elem)

def remove(self, elem):
data = self.data
i = bisect_left(self.data, elem, 0)
if i<len(data) and data[i] == elem:
del data[i]

def _getotherdata(other):
if not isinstance(other, Set):
other = Set(other)
return other.data
_getotherdata = staticmethod(_getotherdata)

def __cmp__(self, other, cmp=cmp):
return cmp(self.data, Set._getotherdata(other))

def union(self, other, find=bisect_left):
i = j = 0
x = self.data
y = Set._getotherdata(other)
result = Set([])
append = result.data.append
extend = result.data.extend
try:
while 1:
if x[i] == y[j]:
append(x[i])
i += 1
j += 1
elif x[i] > y[j]:
cut = find(y, x[i], j)
extend(y[j:cut])
j = cut
else:
cut = find(x, y[j], i)
extend(x[i:cut])
i = cut
except IndexError:
extend(x[i:])
extend(y[j:])
return result

def intersection(self, other, find=bisect_left):
i = j = 0
x = self.data
y = Set._getotherdata(other)
result = Set([])
append = result.data.append
try:
while 1:
if x[i] == y[j]:
append(x[i])
i += 1
j += 1
elif x[i] > y[j]:
j = find(y, x[i], j)
else:
i = find(x, y[j], i)
except IndexError:
pass
return result

def difference(self, other, find=bisect_left):
i = j = 0
x = self.data
y = Set._getotherdata(other)
result = Set([])
extend = result.data.extend
try:
while 1:
if x[i] == y[j]:
i += 1
j += 1
elif x[i] > y[j]:
j = find(y, x[i], j)
else:
cut = find(x, y[j], i)
extend(x[i:cut])
i = cut
except IndexError:
extend(x[i:])
return result

def symmetric_difference(self, other, find=bisect_left):
i = j = 0
x = self.data
y = Set._getotherdata(other)
result = Set([])
extend = result.data.extend
try:
while 1:
if x[i] == y[j]:
i += 1
j += 1
elif x[i] > y[j]:
cut = find(y, x[i], j)
extend(y[j:cut])
j = cut
else:
cut = find(x, y[j], i)
extend(x[i:cut])
i = cut
except IndexError:
extend(x[i:])
extend(y[j:])
return result

b = Set('alacazam')
print a < b
print a
print b
print map(a.__contains__, list('abcdr'))
print map(a.__contains__, list('0ey'))
print list(a)
print a.union(b), ' :union'
print b.union(a), ' :union'
print a.intersection(b), ' :intersection'
print a.difference(b), ' :difference'
print b.difference(a), ' :difference'
print a.symmetric_difference(b), ' :symmetric_difference'
print b.symmetric_difference(a), ' :symmetric_difference'
print a.intersection(b).union(a.symmetric_difference(b)) == a.union(b)
print a.intersection(b).intersection(a.symmetric_difference(b)) == Set([])
```

### History

• revision 2 (16 years ago)
• previous revisions are not available