''' unionfind.py A class that implements the Union Find data structure and algorithm. This data structure allows one to find out which set an object belongs to, as well as join two sets. The algorithm's performance, given m union/find operations of any ordering, on n elements has been shown to take log* time per operation, where log* is pronounced log-star, and is the INVERSE of what is known as the Ackerman function, which is given below: A(0) = 1 A(n) = 2**A(n-1) I include the functions to be complete. Note that we can be 'inefficient' when performing the inverse ackerman function, as it will only take a maximum of 6 iterations to perform; A(5) is 65536 binary digits long (a 1 with 65535 zeroes following). A(6) is 2**65536 binary digits long, and cannot be represented by the memory of the entire universe. The Union Find data structure is not a universal set implementation, but can tell you if two objects are in the same set, in different sets, or you can combine two sets. ufset.find(obja) == ufset.find(objb) ufset.find(obja) != ufset.find(objb) ufset.union(obja, objb) This algorithm and data structure are primarily used for Kruskal's Minimum Spanning Tree algorithm for graphs, but other uses have been found. August 12, 2003 Josiah Carlson ''' def Ackerman(inp, memo={0:1}): inp = max(int(inp), 0) if inp in memo: return memo[inp] elif inp <= 5: memo[inp] = 2**ackerman(inp-1) return memo[inp] else: print "Such a number is not representable by all the subatomic\nparticles in the universe." ackerman(4); out = (inp-4)*"2**" + str(memo[4]) print out raise Exception, "NumberCannotBeRepresentedByAllSubatomicParticlesInUniverse" def inverseAckerman(inp): t = 0 while Ackerman(t) < inp: t += 1 return t class UnionFind: def __init__(self): '''\ Create an empty union find data structure.''' self.num_weights = {} self.parent_pointers = {} self.num_to_objects = {} self.objects_to_num = {} self.__repr__ = self.__str__ def insert_objects(self, objects): '''\ Insert a sequence of objects into the structure. All must be Python hashable.''' for object in objects: self.find(object); def find(self, object): '''\ Find the root of the set that an object is in. If the object was not known, will make it known, and it becomes its own set. Object must be Python hashable.''' if not object in self.objects_to_num: obj_num = len(self.objects_to_num) self.num_weights[obj_num] = 1 self.objects_to_num[object] = obj_num self.num_to_objects[obj_num] = object self.parent_pointers[obj_num] = obj_num return object stk = [self.objects_to_num[object]] par = self.parent_pointers[stk[-1]] while par != stk[-1]: stk.append(par) par = self.parent_pointers[par] for i in stk: self.parent_pointers[i] = par return self.num_to_objects[par] def union(self, object1, object2): '''\ Combine the sets that contain the two objects given. Both objects must be Python hashable. If either or both objects are unknown, will make them known, and combine them.''' o1p = self.find(object1) o2p = self.find(object2) if o1p != o2p: on1 = self.objects_to_num[o1p] on2 = self.objects_to_num[o2p] w1 = self.num_weights[on1] w2 = self.num_weights[on2] if w1 < w2: o1p, o2p, on1, on2, w1, w2 = o2p, o1p, on2, on1, w2, w1 self.num_weights[on1] = w1+w2 del self.num_weights[on2] self.parent_pointers[on2] = on1 def __str__(self): '''\ Included for testing purposes only. All information needed from the union find data structure can be attained using find.''' sets = {} for i in xrange(len(self.objects_to_num)): sets[i] = [] for i in self.objects_to_num: sets[self.objects_to_num[self.find(i)]].append(i) out = [] for i in sets.itervalues(): if i: out.append(repr(i)) return ', '.join(out) if __name__ == '__main__': print "Testing..." uf = UnionFind() az = "abcdefghijklmnopqrstuvwxyz" az += az.upper() uf.insert_objects(az) import random cnt = 0 while len(uf.num_weights) > 20: cnt += 1 uf.union(random.choice(az), random.choice(az)) print uf, cnt print "Testing complete."