This is inspired by Raymond Hettingers recipe for sets, using sorted lists. However here I'm not sorting the original list but just the indices and the sets share a common universe. This code is not for production use, I just show it in order to explain what I want to do.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | from array import array
class VSort:
def __init__(self):
self.atoms = []
self.unranks = array('L',[])
def rank(self,x):
a = self.atoms
b = self.unranks
n = len(a)
i = self.bisect_left(x)
if i == n or a[b[i]] != x:
a.append(x)
b.insert(i,n)
return b[i]
def bisect_left(self,x, lo=0, hi=None):
a = self.atoms
b = self.unranks
if hi is None:
hi = len(a)
while lo < hi:
mid = (lo+hi)//2
if a[b[mid]] < x:
lo = mid+1
else:
hi = mid
return lo
def __contains__(self,x):
a = self.atoms
b = self.unranks
n = len(a)
i = self.bisect_left(x)
return i < n and a[b[i]] == x
def __len__(self):
return len(self.atoms)
def items(self):
a = self.atoms
b = self.unranks
return [a[i] for i in b]
def __iter__(self):
return iter(self.items())
def remove(self,x):
a = self.atoms
b = self.unranks
n = len(a)
i = self.bisect_left(x)
y = b[i]
if i < n and a[y] == x:
del a[y]
del b[i]
for j,k in enumerate(b):
if k > y:
b[j] -= 1
U = VSort()
class Set:
def __init__(self,seq):
self.V = VSort()
for x in seq:
self.V.rank(U.rank(x))
def __repr__(self):
return 'Set(' + repr(self.items()) + ')'
def __len__(self):
return len(self.V)
def __iter__(self):
return iter(self.items())
def items(self):
V = VSort()
for i in self.V:
V.rank(U.atoms[i])
return V.items()
def union(self,other):
return Set(self.items()+other.items())
def __contains__(self,x):
return U.rank(x) in self.V
def add(self,x):
self.V.rank(U.rank(x))
def remove(self,x):
self.V.remove(U.rank(x))
def intersection(self,other):
V = VSort()
a = self.V
b = other.V
if len(a) > len(b):
a,b = b,a
for x in a:
if x in b:
V.rank(x)
return Set([U.atoms[i] for i in V])
def difference(self,other):
V = VSort()
for x in self.V:
if x not in other.V:
V.rank(x)
return Set([U.atoms[i] for i in V])
def test():
S1 = Set('adjhdjgfyhrfjhsdf')
S2 = Set('12748134346')
print S1.union(S2)
print '1' in S1
S1.add('1')
print '1' in S1
S1.remove('1')
print '1' in S1
S3 = Set('adj')
print S1
print S1.intersection(S3)
print S1.difference(S3)
S4 = Set([S1,S2,S3])
print S4
if __name__=='__main__':
test()
|
It's slow but I think its beautyful because it reuses vsort in more ways. Also it should be memory efficient because it represents sets using lists of integers. Maybe someday this can be used in pypy to implement sets without using dictionaries.
Python 2.5 sets are implemented as a dictionary variant without values (not even references to None). See setobject.c, available here: http://svn.python.org/view/python/trunk/Objects/setobject.c?rev=41434&view=auto
If you're looking for an API/idea similar to this one, check out the sortedcontainers module. It implements sorted list, sorted dict, and sorted set data types in pure-Python and is fast-as-C implementations (even faster!). Learn more about sortedcontainers, available on PyPI and github.