def processSaving(self, saving): (k, i, j, s) = saving x = self.paths[i] y = self.paths[j] if uf.joinable(self.capacity, x, y): uf.union(x, y) self.used_savings.append(k) self.solution -= s
def compute_mst(self): node_set = [make_set(i) for i in self.datapaths.iterkeys()] mst = [] match_node = lambda ln: next(n for n in node_set if n.id == ln) link_set = [(match_node(ln1), match_node(ln2)) for (ln1, ln2) in self.link_list] for n1, n2 in link_set: if find(n1) is not find(n2): mst.append((n1, n2)) mst.append((n2, n1)) union(n1, n2) pretty_mst = [(n1.id, n2.id) for (n1, n2) in mst] return pretty_mst
def main(): K = 4 # Desired number of clusters with open('clustering1.txt', 'rt') as file: cluster_cnt = int(file.readline()) # First line is count of nodes edges = file.readlines() edges = [tuple([int(x) for x in line.split()]) for line in edges] # Sort edges in decreasing order of distance. Sorting in reverse # order lets us pop() the next smallest edge in O(1) time edges.sort(key=lambda x: -x[2]) nodes = [Node(x) for x in range(1, 501)] distance = 0 # Assuming distance between nodes is always nonnegative # >= because we want the mininum distance between clusters AFTER # we only have K clusters while cluster_cnt >= K: while True: # Get next smallest edge node_label_a, node_label_b, distance = edges.pop() node_a, node_b = nodes[node_label_a - 1], nodes[node_label_b - 1] if union(node_a, node_b): # node_a and node_b were in separate clusters that have # been merged by union() cluster_cnt -= 1 break return distance
def main(): with open('clustering_big.txt', 'rt') as file: node_cnt, bits = [int(x) for x in file.readline().split()] bs_list = ["".join(line.split()) for line in file.readlines()] # Build a mapping betwen binary strings and their indicies within # the input file -- the indicies will serve as node labels # # Note: There are duplicate binary strings in the input file # (so there will be multiple nodes with the same binary string value) bs_dict = {} for idx, string in enumerate(bs_list): try: bs_dict[string].append(idx) except KeyError: bs_dict[string] = [idx] # Initialize list of Nodes for tracking clusters with Union-Find uf_nodes = [Node(i) for i in range(node_cnt)] # Initialize an empty set to store edges edges = set() # Iterate over all binary strings and their indicies for cur_label, string in enumerate(bs_list): # For each binary string, get a list of all binary strings within # 2 bits of difference ("close strings") close_strs = get_close_values(string, max_diff=2) for close_str in close_strs: # For each close string, create an edge between the current node # and all nodes corresponding to the close string of the form # (i, j, difference in bits) where i < j and add to edge set nearby_nodes = bs_dict.get(close_str, []) if nearby_nodes: distance = get_diff_bits(string, close_str) for nearby_label in nearby_nodes: if cur_label < nearby_label: edges.add((cur_label, nearby_label, distance)) elif cur_label > nearby_label: edges.add((nearby_label, cur_label, distance)) # Make a list from the edge set and sort edges in reverse order # of bits edges = sorted(list(edges), key=lambda x: -x[2]) # Run Kruskal's algorithm on the edges until none remain cluster_cnt = node_cnt while edges: # Get next smallest edge node_label_a, node_label_b, distance = edges.pop() node_a, node_b = uf_nodes[node_label_a], uf_nodes[node_label_b] if union(node_a, node_b): # node_a and node_b were in separate clusters that have # been merged by union() cluster_cnt -= 1 # Return the number of clusters print("Cluster count: {}".format(cluster_cnt))
def min_spanning_tree(graph): vertices, edges = set(), [] for v1 in graph: for v2 in graph[v1]: edges.append((graph[v1][v2], v1, v2)) vertices.update((v1, v2)) edges = sorted(edges, reverse=True) sets = dict((v, None) for v in vertices) result = dict((v, {}) for v in vertices) result_weight = 0 while True: if not edges: return result, result_weight weight, v1, v2 = edges.pop() if union_find.find(sets, v1) != union_find.find(sets, v2): result[v1][v2] = weight result[v2][v1] = weight result_weight += weight union_find.union(sets, v1, v2)
def compute(nodes, edges): parents = make_sets(len(nodes)) bar = StatusBar(len(edges)) counter = 0 for line in edges: if line: first_delimiter = line.find("\t") second_delimiter = line.find("\t", first_delimiter + 1) left = int(line[:first_delimiter]) right = int(line[first_delimiter:second_delimiter]) union(parents, left, right) counter += 1 if counter % 5000 == 0: bar.update(counter) bar.close() bar = StatusBar(len(parents)) for counter, x in enumerate(parents): parents[counter] = find(parents, x) if counter % 10000 == 0: bar.update(counter) bar.close() return parents
def largest_k_clusters(file, k): with open(file) as f: n_nodes = int(f.readline()) # initialize subsets for union find subsets = {} for i in range(1, n_nodes + 1): subsets[i] = union_find.Subset(i) # create sorted list of edges # tuple in the form (cost, node1, node2) edges = [] for line in f: edge = [int(x) for x in line.split()] edges.append((edge[2], edge[0], edge[1])) edges.sort() # clustering algorithm based on Kruskal n_clusters = n_nodes i = 0 while n_clusters != k: # assumes k < nodes subset1 = union_find.find(subsets, edges[i][1]) subset2 = union_find.find(subsets, edges[i][2]) if (subset1 != subset2): union_find.union(subsets, subset1, subset2) n_clusters -= 1 i += 1 # determine max spacing (cost of first edge with different subsets) for edge in edges: subset1 = union_find.find(subsets, edge[1]) subset2 = union_find.find(subsets, edge[2]) if (subset1 != subset2): max_spacing = edge[0] break return max_spacing
print('number of disjoint sets: %s' % (len([ i for i in itertools.groupby(sets) ]))) print() union_find = union_find.UnionFind() nodes = [ Node(ch) for ch in 'abcdefg' ] print('labels: %s' % ([ str(i) for i in nodes ])) for node in nodes: union_find.make_set(node) print_sets(nodes) assert(union_find.find(nodes[0]) != union_find.find(nodes[2])) union_find.union(nodes[0], nodes[2]) assert(union_find.find(nodes[0]) == union_find.find(nodes[2])) print_sets(nodes) assert(union_find.find(nodes[0]) != union_find.find(nodes[1])) assert(union_find.find(nodes[1]) != union_find.find(nodes[2])) union_find.union(nodes[0], nodes[1]) assert(union_find.find(nodes[0]) == union_find.find(nodes[1])) assert(union_find.find(nodes[1]) == union_find.find(nodes[2])) print_sets(nodes) assert(union_find.find(nodes[-2]) != union_find.find(nodes[-1])) union_find.union(nodes[-2], nodes[-1]) assert(union_find.find(nodes[-2]) == union_find.find(nodes[-1]))
def run_union_find(onStepUpdate=None): # start from the start of the maze and look for the next connection currect_index = 0 # index in the data array # while the start and end of the maze are not connected # try to find the next connected item of the path steps = [] while not connected(data, 0, size - 1): # for currect cell get all surrounding coordinates # from these coordinates randomly select one as the next step, # but with the condition that this coordinate is not connected to the currect cell and is not a "WALL" # for every loop save the steps steps.append(currect_index) next_steps = find_next_steps(currect_index) if len(next_steps) == 0: """ Dead end reached. Need to get back and look at previous connections next steps. """ print( "Dead end at index:", currect_index, "and coordinate:", hashTable[currect_index], ) if onStepUpdate: onStepUpdate({ "status": "DEAD_END", "value": hashTable[currect_index] }) prev_step = steps.index(currect_index) - 1 while (prev_step >= 0 and len(find_next_steps(steps[prev_step])) == 0): # go check for a new route starting from one step before the current one # loop until a node with possible next steps to be folowed prev_step -= 1 if prev_step >= 0: print("Loogin for new route at index", steps[prev_step]) currect_index = steps[prev_step] continue else: print("Could not find a route from start to end... :(") break # get a random item from the array next_index = next_steps[randrange(len(next_steps))] union(data, currect_index, next_index) print("Iteration at index", currect_index) if onStepUpdate: onStepUpdate({ "status": "NEXT_STEP", "value": hashTable[currect_index] }) # prepare for next loop currect_index = next_index print("Iteration at last index", size - 1) print("--------------------------------------------------------") # append last index of the array steps.append(size - 1) step_coordinates = list(map(lambda item: hashTable[item], steps)) print("Iteration traversed the following coordinates:") print(step_coordinates)