def simpleTests(): test = range(10) uf = UnionFind() for t in test: uf.makeSet(t) # END for for t in test: assert uf.find(t) == t, "Parent not initialized correctly." # END for assert uf.countGroups() == 10, "Counted wrong number of groups." uf.union(0,1) assert uf.find(1) == 0, "Parent not updated correctly." assert uf.data[0][1] == 1, "Order not updated for equal trees correctly." assert uf.countGroups() == 9, "Counted wrong number of groups." uf.union(1,2) assert uf.find(2) == 0, "Parent not updated correctly." assert uf.data[0][1] == 1, "Order not updated for unequal trees correctly." assert uf.countGroups() == 8, "Counted wrong number of groups." uf.union(3,4) uf.union(4,5) uf.union(0,3) assert uf.data[0][1] == 2, "Order not updated for unequal trees." assert uf.data[5][0] == 3, "Parent should not be updated until find operation." assert uf.find(5) == 0, "Find operation returned wrong parent." assert uf.data[5][0] == 0, "Parent should have been updated." assert uf.countGroups() == 5, "Counted wrong number of groups."
class Kruskal: def __init__(self, data): nodes = int(data[0].split()[0]) self.ufSet = UnionFind() for n in range(nodes): self.ufSet.makeSet(n) # END for self.edges = [] for k in data[1:]: row = map(int, k.strip().split()) self.edges.append((row[0] - 1, row[1] - 1, row[2])) # END for self.edges.sort(key=itemgetter(2)) # END __init__ def mstKruskal(self): mst = [] l = 0 for edge in self.edges: s1 = self.ufSet.find(edge[0]) s2 = self.ufSet.find(edge[1]) if s1 == s2: continue # END if self.ufSet.union(edge[0], edge[1]) mst.append(edge) l += edge[2] # END for self.mst = mst return l # END mstKruskal def clusterKruskal(self, k): print "Running Clustering, k={0}".format(k) done = False for edge in self.edges: s1 = self.ufSet.find(edge[0]) s2 = self.ufSet.find(edge[1]) if s1 == s2: continue # END if if not done: self.ufSet.union(edge[0], edge[1]) else: print "Smallest unallocated edge: {0}".format(edge) return edge[2] # END if if self.ufSet.countGroups() == k: done = True
class ClusterHamming: def __init__(self, data): (nodes, self.bits) = map(int, data.pop(0).split()) self.uf = UnionFind() for n in range(nodes): self.uf.makeSet(n) # END for self.hammingData = defaultdict(list) for n in range(nodes): s = data[n].replace(' ', '') self.hammingData[s].append(n) # END for # END __init__ def flip(self, s, flipbits): """ Given an input string (s) and tuple of indices (flipbits), returns a new string with bits at specified indices flipped. The length of (flipbits) determines the resulting hamming distance. """ result = '' for i, c in enumerate(s): if i in flipbits: if c == '1': result += '0' else: result += '1' else: result += c return result # END flip def getHammingPermutations(self, s, n): """ Generate permutations of s whose distance is less than or equal to n """ result = [] result.append(s) for d in range(1, n + 1): for flipbits in combinations(range(self.bits), d): result.append(self.flip(s, flipbits)) # END for # END for return result # END getHammingPermutations def printSummary(self): resultMap = defaultdict(list) for k, v in self.hammingData.iteritems(): cluster = self.uf.find(v) resultMap[cluster].append(k) # END for for k, v in resultMap.iteritems(): print "\n\nCluster {0}:".format(k) for key in v: print "\t{0}".format(key) # END for # END for # END printSummary def run(self, minDist): data = copy(self.hammingData) while data: (nodeKey, refNodes) = data.popitem() if len(refNodes) > 1: for i in range(1, len(refNodes)): self.uf.union(refNodes[0], refNodes[i]) # END for # END for nearestNodes = self.getHammingPermutations(nodeKey, minDist - 1) for testNodeKey in nearestNodes: if testNodeKey not in data: continue testNodes = self.hammingData[testNodeKey] for n in testNodes: if self.uf.find(n) == self.uf.find(refNodes[0]): continue self.uf.union(refNodes[0], n) # END for # END for # END while return self.uf.countGroups()