def two(): """ Calculates largest value k such that there is a k-clustering with spacing >= 3 """ # Read in the file, converting to str of binary number vert_df = pd.read_csv('clustering_big.txt', header=0, names=['vid'], converters = {'vid' : strip}) vertices = vert_df['vid'].unique() uf = UnionFind(vertices) to_visit = set(vertices) # while len(to_visit) > 0: # Every iteration of the while loop corresponds to # pulling off a new vertex label and attempting to merge # with all other vertices connected at cost <= 2 this_v = to_visit.pop() nearby_v = nearby(this_v, to_visit) for v in nearby_v: l1 = uf[this_v] l2 = uf[v] if l1 != l2: uf.union(this_v, v) to_visit.remove(v) return uf.numleaders()
def test_union_with_invalid_values(self): uf = UnionFind(10) with self.assertRaises(ValueError): uf.union(-1, 1) with self.assertRaises(ValueError): uf.union(11, 1)
def test_same_set_with_invalid_values(self): uf = UnionFind(10) with self.assertRaises(ValueError): uf.same_set(-1, 1) with self.assertRaises(ValueError): uf.same_set(11, 0)
def weak_connected_components(nodes): # @param {DirectedGraphNode[]} nodes a array of directed graph node # @return {int[][]} a connected set of a directed graph union_find = UnionFind([node.label for node in nodes]) for node in nodes: for neighbor in node.neighbors: union_find.union(node.label, neighbor.label) return transform_to_cluster(union_find)
def test_union_when_already_united(self): uf = UnionFind([[1,2,3],[4,5],[6,7,8,9,0]]) self.assertEqual(uf.find(1), 1) self.assertEqual(uf.find(2), 1) uf.union(1,2) self.assertEqual(uf.find(1), 1) self.assertEqual(uf.find(2), 1)
def __mst(self): uf = UnionFind(self.graph.V()) for edge in self.__sorted_edges(): u, v, w = edge if not uf.connected(u, v): self.mst.append((u, v)) uf.union(u, v) self.w += w
def Kruskal(): edges = sorted(es, key=lambda es: es[2]) s = UnionFind(V) res = 0 for i in xrange(E): e = edges[i] u = e[0] v = e[1] if not s.same(u, v): s.union(u, v) res += e[2] return res
def test_union(self): uf = UnionFind([i for i in range(0, 10)]) unions = [0, 2, 3, 3, 5, 5, 5, 5, 5, 5] for i, j in enumerate(unions): uf.union(i, j) self.assertEqual(uf.ids, [0, 1, 1, 1, 4, 4, 4, 4, 4, 4], msg='Union Find merge incorrect') self.assertEqual( uf.find(2), 1, msg='Union find incorrect finds the representative class')
def community_size_distribution(vertices, edges): # @author_pairs and @vertices must have reduced id communities = UnionFind(len(vertices)) for (a, b) in edges: communities.union(a, b) trees = communities.get_trees() distro = dict() for tree in trees: size = len(trees[tree]) if size not in distro: distro[size] = 0 distro[size] += 1 return distro
def one(): mst = set([]) edge_df = pd.read_csv('clustering1.txt', sep=" ", header=0, names=['v1','v2','cost']) # Sort in order of least cost edge_df.sort(['cost'], inplace=True) edge_df.index = range(1,len(edge_df)+1) # The unique set of vertices... vertices = pd.concat([edge_df['v1'], edge_df['v2']]).unique() # Initialize the UnionFind structure with # unmerged set of unique vertices uf = UnionFind(vertices) # Iterate through Kruskal's until the number # of groups in the UnionFind struct is K it = edge_df.iterrows() while uf.numleaders() > K: rec = next(it)[1] v1 = rec['v1'] v2 = rec['v2'] cost = rec['cost'] edge = Edge(v1, v2, cost) v1_lead = uf[v1] v2_lead = uf[v2] if v1_lead != v2_lead: uf.union(v1, v2) mst.add(edge) # Now iterate to the next edge that would be added # to form the K-1th cluster cond = True while cond: rec = next(it)[1] v1 = rec['v1'] v2 = rec['v2'] cost = rec['cost'] edge = Edge(v1, v2, cost) v1_lead = uf[v1] v2_lead = uf[v2] cond = (v1_lead == v2_lead) if not cond: mks = cost return mks
def minSwapsCouples(self, row): """ :type row: List[int] :rtype: int """ N = len(row) / 2 uf = UnionFind(N) for i in range(N): x, y = row[2 * i], row[2 * i + 1] uf.union(x / 2, y / 2) return N - uf.num_groups
def kruskal_mst(graph): union_find = UnionFind(graph.num_vertices) edges = sorted(graph.edges()) for e in edges: v = e.either() w = e.other(v) if union_find.connected(v, w): continue union_find.union(v, w) yield e
def kruskal(graph): total_cost = 0 min_cost_tree = Graph() connected_components = UnionFind(graph.get_all_vertices()) edges_queue = graph.get_all_edges() heapq.heapify(edges_queue) while edges_queue: cost, edge = heapq.heappop(edges_queue) if is_valid_edge(edge, min_cost_tree, connected_components): total_cost += cost min_cost_tree.add_edge(edge[0], edge[1], cost) connected_components.union(edge[0], edge[1]) return total_cost, min_cost_tree
def kruskals(N, edges): """Returns a list of edges of the graph that make up a minimum spanning tree using Kruskal's algorithm. N (int): the number of nodes in the graph edges [(int, int, float)]: the edges in the graph represented as (node1, node2, weight) where 0 <= node < N """ UF = UnionFind(list(range(N))) # create union find data structure on the nodes edges.sort(key=lambda edge: edge[2]) # sort edges by increasing weight mst = [] # will store list of edges for edge in edges: if UF.union(edge[0], edge[1]): mst.append(edge) if len(mst) < N - 1: # graph was not connected return False return mst
def kruskal(self, k=4): def compute_spacing(uf): min_space = float('inf') for triplet in self.triplets: u, v, w = triplet if uf.children[u] != uf.children[v]: if w < min_space: min_space = w return min_space self.triplets.sort(key=lambda x: (x[2], x[0], x[1])) mst = [] idx = 0 uf = UnionFind() for node in self.nodes: uf.children[node] = node uf.leaders[node] = [node] while len(mst) < self.num_nodes - 1 - k: u, v = self.triplets[idx][:2] cycle = False if uf.children[u] == uf.children[v]: cycle = True elif len(uf.leaders[uf.children[u]]) >= len( uf.leaders[uf.children[v]]): uf.union(uf.children[u], uf.children[v]) else: uf.union(uf.children[v], uf.children[u]) if not cycle: mst.append(self.triplets[idx]) idx += 1 return compute_spacing(uf)
def solution(n, library, road, edges): if road >= library: return n * library uf = UnionFind(n) road_count = 0 for edge in edges: if uf.union(edge[0] - 1, edge[1] - 1): road_count += 1 if uf.count == 1: break return road_count * road + uf.count * library
def kruskal_mst(g, num_nodes): total_cost = index = 0 g.sort() uf = UnionFind(num_nodes) while uf.size() > 1: cost, u, v = g[index] if uf.union(u, v): total_cost += cost index += 1 return total_cost
def findCircleNum(self, M): """ :type M: List[List[int]] :rtype: int """ n = len(M) uf = UnionFind(n) for i in range(n): for j in range(i + 1, n): if M[i][j] == 1: uf.union(i, j) return uf.num_groups
def test_connected(): union = UnionFind(3) assert_equals(union.connected(0,1), False) assert_equals(union.connected(1,2), False) union.union(0,1) assert_equals(union.connected(0,1), True) assert_equals(union.connected(1,2), False) union.union(1,2) assert_equals(union.connected(2,0), True) assert_equals(union.connected(0,2), True) assert_equals(union.connected(1,2), True)
def main(argv): vtotal, vertices = construct(argv[0]) vertices = sorted(vertices) uf = UnionFind(vtotal) for i in xrange(vtotal): for j in xrange(i + 1, vtotal): if not uf.connected(vertices[i][1], vertices[j][1]): if hamming_distance(vertices[i][0], vertices[j][0]) <= 2: uf.union(vertices[i][1], vertices[j][1]) print print '%s clusters' % (uf.count()) print
def areSentencesSimilarTwo(self, words1, words2, pairs): """ :type words1: List[str] :type words2: List[str] :type pairs: List[List[str]] :rtype: bool """ if len(words1) != len(words2): return False uf = UnionFind() for s1, s2 in pairs: uf.union(s1, s2) for i in range(len(words1)): w1, w2 = words1[i], words2[i] if w1 == w2: continue # ! be careful about conditions when word is not mapping if w1 not in uf.parents or w2 not in uf.parents: return False if uf.find(w1) != uf.find(w2): return False return True
class UnionFind2: def __init__(self, groups, fn): self.fn = fn g = [] for group in groups: g.append(map(fn, group)) self.uf = UnionFind(g) def find(self, item): return self.uf.find(self.fn(item)) def union(self, item_1, item_2): self.uf.union(self.fn(item_1), self.fn(item_2)) def get_clusters(self): return self.uf.leader_to_group.values()
def minimum_spanning_tree(self) -> WeightedUndirectedGraph: """ Find the minimum spanning tree (MST) of the graph using Kruskal's algorithm :return: the minimum spanning tree as a WeightedUndirectedGraph """ # sort edges with a sef-made quick sort :D sorted_edges = list(self.edges.values()).copy() qsort(sorted_edges, lambda x: x[2]) # initialize clusters trees = UnionFind(list(self.vertices.keys())) mst_edges = [] # examine edges in ascending order of weight for edge in sorted_edges: v0, v1, weight = edge if not trees.neighbors(v0, v1): trees.union(v0, v1) mst_edges.append(edge) if len(mst_edges) == len(self.vertices) - 1: break continue # construct a WeightedUndirectedGraph to represent the minimum spanning tree mst = WeightedUndirectedGraph.index_edges( list(self.vertices.keys()), mst_edges) # there is no edge saved for the starting vertex return mst
def clustering(self, k=4) -> float: """ Perform maximum spacing clustering on the graph, stopping at k clusters :param k: number of clusters to stop at :return: the clyster spacing """ # sort edges with a sef-made quick sort :D sorted_edges = list(self.edges.values()).copy() qsort(sorted_edges, lambda x: x[2]) # initialize clusters clusters = UnionFind(list(self.vertices.keys())) # examine edges in ascending order of weight for edge in sorted_edges: v0, v1, weight = edge # edges within clusters are ignored if not clusters.neighbors(v0, v1): if len(clusters) == k: # if the number of cluster is reached, return the next edge between clusters return weight else: # if the number of cluster is not reached, join clusters clusters.union(v0, v1) continue
def main(argv): vertices = construct(argv[0]) distances = generate_distances(24, 2) + generate_distances(24, 1) uf = UnionFind(len(vertices)) for vertex in vertices: for distance in distances: candidate = vertex ^ distance if candidate in vertices: if not uf.connected(vertices[vertex], vertices[candidate]): uf.union(vertices[vertex], vertices[candidate]) print print '%s clusters' % (uf.count()) print
def karger_min_cut(G, edges): n = max(G.keys()) cuts = UnionFind(n+1) edges_map = {} edges_index = 0 for _ in xrange(n-2): edges_index = contract(G, edges, cuts, edges_index) #print G, edges assert(len(G) == 2) u, v = G.keys() #assert(len(G[u]) == len(G[v])) #before returning the cuts list, we must remove the self loops from the adjacency list return filter(lambda (k, z): not cuts.connected(u, z), G[u]) #each edge is stored twice, so we can just return one of the two vertices' adj list
def kruskal(g): V = g.number_of_nodes() edges = sorted(g.edges(data="weight"), key=lambda x: x[2]) uf = UnionFind(V) mst = [0] * (V-1) e = i = 0 while e < V-1: edge = edges[i] src, des, _ = edge if uf.find(src) != uf.find(des): uf.union(src, des) mst[e] = (src, des) e += 1 i += 1 return mst
def test0(self): u = UnionFind(3) u.union(0, 1) u.union(0, 2) u.union(2, 3) self.assertTrue(u.same_set(1, 2)) self.assertTrue(u.same_set(1, 3))
def kruskal(g): MST = [] MST_weight = 0 # initializing Union Find U = UnionFind() U.initialize(g.V) # initializing heap heap = Heap() for e in g.E: heap.push([e[2], (e[0], e[1])]) # [cost, edge] while not (len(MST) == g.n - 1 or heap.is_empty()): aux_edge = heap.pop() edge = [aux_edge[1][0], aux_edge[1][1], aux_edge[0]] # v1, v2, cost e0_find = U.find(edge[0]) e1_find = U.find(edge[1]) if e0_find != e1_find: MST.append(edge) MST_weight += edge[2] # weighting MST U.union(e0_find, e1_find) return MST, MST_weight
def minMalwareSpread(self, graph, initial): """ :type graph: List[List[int]] :type initial: List[int] :rtype: int """ if not initial: return -1 # return the smallest index if multiple results initial.sort() n = len(graph) uf = UnionFind(n) # union the whole graph for i in range(n): for j in range(i + 1, n): if graph[i][j] == 1: uf.union(i, j) # if only one initially infected node, the damage reduced will be the group size # => return the infected node in the largest group # if 2+ initially infected node in a group, cannot reduce the damage # => return the infected node with minimum index counter = collections.Counter( uf.find(i) for i in initial) # group_parent => # of initially infected nodes one_infected = [i for i in initial if counter[uf.find(i)] == 1] if one_infected: return max(one_infected, key=lambda i: uf.sizes[uf.find(i)]) else: return min(initial)
def min_spanning_tree(g): tree_edges = [] edges_by_cost = g.edges[:] edges_by_cost.sort(key=lambda e : e.cost) # initially each node is in its own group initial_groups = map(lambda n : [n], g.nodes) uf = UnionFind(initial_groups) def edge_creates_cycle(edge): return uf.find(edge.node1) == uf.find(edge.node2) for edge in edges_by_cost: if not edge_creates_cycle(edge): tree_edges.append(edge) uf.union(edge.node1, edge.node2) return tree_edges
def min_spanning_tree(g): tree_edges = [] edges_by_cost = g.edges[:] edges_by_cost.sort(key=lambda e: e.cost) # initially each node is in its own group initial_groups = map(lambda n: [n], g.nodes) uf = UnionFind(initial_groups) def edge_creates_cycle(edge): return uf.find(edge.node1) == uf.find(edge.node2) for edge in edges_by_cost: if not edge_creates_cycle(edge): tree_edges.append(edge) uf.union(edge.node1, edge.node2) return tree_edges
def mst_kruskal(graph): '''Using Disjoint-set data structure to select edges''' total_cost, mst = 0, [] u = UnionFind() # Sorted by weight edges = sorted(graph.get_edges(), key=lambda x: x[2]) for start, end, weight in edges: # If start vertex and end vertex has different parent on union-find structure # then joining the two subset if u[start] != u[end]: u.union(u[start], u[end]) total_cost += weight mst.append((start, end)) return total_cost, mst
def main(argv): vtotal, edges = construct_edges(argv[0]) edges = sorted(edges) uf = UnionFind(vtotal) k = int(argv[1]) T = set([]) max_spacing = 0 while uf.count() >= k: edge = edges.pop(0) if not uf.connected(edge[1], edge[2]): uf.union(edge[1], edge[2]) max_spacing = edge[0] print print 'For %s clustering: max spacing = %s' % (k, max_spacing) print
def test_union_find(self): elements = [1, 2, 3, 4, 6, 7, 8] u1 = UnionFind(elements) for e in elements: self.assertEqual(e, u1.find(e)) new_parent = u1.union(1, 2) self.assertEqual(1, new_parent) new_parent = u1.union(3, 4) self.assertEqual(3, new_parent) new_parent = u1.union(2, 4) self.assertEqual(1, new_parent) self.assertEqual(1, u1.find(4)) new_parent = u1.union(3, 4) self.assertEqual(1, new_parent) new_parent = u1.union(3, 8) self.assertEqual(1, new_parent) new_parent = u1.union(8, 3) self.assertEqual(1, new_parent)
def test_find(self): uf = UnionFind() five = uf.MakeSet(5) seven = uf.MakeSet(7) uf.Union(five, seven) self.assertEqual(uf.Find(five), seven) self.assertEqual(uf.Find(seven), seven)
def kruskal_mst_improved(self) -> float: """ Finds the minimum spanning tree (MST) using improved Kruskal's MST Algorithm. :return: float """ # 1. Sort the edges in order of increasing cost [O(mlog m)] edges = sorted(self._edge_list) # 2. Initialize T = {empty}, which is the current spanning tree curr_spanning_tree = [] # 3. Create a Union Find of vertices # object -> vertex # group -> connected component w.r.t. the edges in T # Each of the vertex is on its own isolated connected component. union_find = UnionFind(self._vtx_list) # 4. For each edge e = (v, w) in the sorted edge list [O(nlog n)] for edge in edges: # Check whether adding e to T causes cycles in T # This is equivalent to checking whether there exists a v-w path in # T before adding e. # This is equivalent to checking whether the leaders of v and w in # the UnionFind are the same. if edge.end1.leader is not edge.end2.leader: curr_spanning_tree.append(edge) # Fuse the two connected components to a single one group_name_v, group_name_w = edge.end1.leader.obj_name, \ edge.end2.leader.obj_name union_find.union(group_name_v, group_name_w) # Originally we would think it involves O(mn) leader updates; however, # we can change to a "vertex-centric" view: # Consider the number of leader updates for a single vertex: # Every time the leader of this vertex gets updated, the size of its # connected components at least doubles, so suppose it experiences x # leader updates in total, we have # 2^x <= n # x <= log2 n # Thus, each vertex experiences O(log n) leader updates, leading to a # O(nlog n) leader updates in total. return sum(map(lambda x: x.cost, curr_spanning_tree))
def karger_min_cut(G, edges): n = max(G.keys()) cuts = UnionFind(n + 1) edges_map = {} edges_index = 0 for _ in xrange(n - 2): edges_index = contract(G, edges, cuts, edges_index) #print G, edges assert (len(G) == 2) u, v = G.keys() #assert(len(G[u]) == len(G[v])) #before returning the cuts list, we must remove the self loops from the adjacency list return filter( lambda (k, z): not cuts.connected(u, z), G[u] ) #each edge is stored twice, so we can just return one of the two vertices' adj list
def test_union(self): uf = UnionFind([[1,2,3],[4,5],[6,7,8,9,0]]) self.assertEqual(uf.find(2), 1) self.assertEqual(uf.find(5), 4) uf.union(2,5) self.assertEqual(uf.find(1), 1) self.assertEqual(uf.find(2), 1) self.assertEqual(uf.find(3), 1) self.assertEqual(uf.find(4), 1) self.assertEqual(uf.find(5), 1)
def find_best_delta_by_num_ccs_for_given_k(permuted_sim, edges, k): if k < 2: raise ValueError("k must be at least 2") max_num_ccs = 0 #initially, each node is its own CC of size 1, so none is of size >= k for k >= 2 bestDeltas = [edges[0].weight] uf = UnionFind() for edge in edges: uf.union(edge.node1, edge.node2) num_ccs = len([root for root in uf.roots if uf.weights[root] >= k]) if num_ccs > max_num_ccs: max_num_ccs = num_ccs bestDeltas = [edge.weight] elif num_ccs == max_num_ccs: bestDeltas.append(edge.weight) return max_num_ccs, bestDeltas
class Graph(): """simple Graph class to run Prim's MST algorithm""" def __init__(self, file_name): lines = self.get_edges_size(self.open_file(file_name)) self.sorted_edges = tuple(sorted(self.process_line(lines), key=lambda edge: edge[2]) ) def open_file(self, file_name): """simple lines generator""" with open(file_name) as myfile: for line in myfile: yield line def get_edges_size(self, line_seq): """get the number of vertices as given in the first line of file setup the vertices as a tuple so we don't waste memory populate the __vertecies tuple with lists of tuples of vertex,cost """ num_verticies = int(next(line_seq).split()[0]) next(line_seq) self.__union_find = UnionFind(num_verticies) for line in line_seq: yield line def process_line(self, line_seq): for line in line_seq: types = (int, int, float) yield tuple(fun(val) for fun, val in zip(types, line.split())) def kruskal_mst(self): """pick a random start_vertex then extract from heap until empty""" total_cost = 0 edges = (val for val in self.sorted_edges) while self.__union_find.sets > 1: edge = next(edges) if self.__union_find.connected(edge[0], edge[1]): continue total_cost += edge[2] self.__union_find.union(edge[0], edge[1]) return total_cost
def get_edges_size(self, line_seq): """get the number of vertices as given in the first line of file setup the vertices as a tuple so we don't waste memory populate the __vertecies tuple with lists of tuples of vertex,cost """ num_verticies = int(next(line_seq).split()[0]) next(line_seq) self.__union_find = UnionFind(num_verticies) for line in line_seq: yield line
def test_init(): uf = UnionFind(3) uf.add(('a', 1)) for i in xrange(3): assert (i in uf) assert (('a', 1) in uf) uf = UnionFind(letter_data) for i in letter_data: assert (i in uf) assert (1 not in uf) uf = uf.copy() uf = UnionFind(letter_data) for i in letter_data: assert (i in uf)
class Graph(): """simple Graph class to run clustering algorithm""" def __init__(self, file_name): lines = self.get_edges_size(self.open_file(file_name)) self.sorted_edges = tuple(sorted(self.process_line(lines), key=lambda edge: edge[2]) ) def open_file(self, file_name): """simple lines generator""" with open(file_name) as myfile: for line in myfile: yield line def get_edges_size(self, line_seq): """get the number of vertices as given in the first line of file setup the vertices as a tuple so we don't waste memory populate the __vertecies tuple with lists of tuples of vertex,cost """ num_verticies = int(next(line_seq).split()[0]) # next(line_seq) self._union_find = UnionFind(num_verticies) for line in line_seq: yield line def process_line(self, line_seq): for line in line_seq: types = (int, int, int) yield tuple(fun(val) for fun, val in zip(types, line.split())) def clustering(self): """ """ edges = (val for val in self.sorted_edges) while self._union_find.sets > 4: edge = next(edges) if not self._union_find.connected(edge[0], edge[1]): self._union_find.union(edge[0], edge[1]) while self._union_find.connected(edge[0], edge[1]): edge = next(edges) return edge
def kruskals(g): edges = sorted(g.edges, key=lambda e: e[2]) u = UnionFind() # set up all the cities as trees in UF for city in g.cities: u.makeset(city) mst = [] for e in edges: q, v, _ = e if u.find(q) != u.find(v): mst.append(e) u.union(q, v) return mst
def accountsMerge(self, accounts): """ :type accounts: List[List[str]] :rtype: List[List[str]] """ email_to_id, id_to_name = self.create_mappings(accounts) uf = UnionFind(len(email_to_id)) # union emails within an account for account in accounts: p = email_to_id[account[1]] for i in range(2, len(account)): q = email_to_id[account[i]] uf.union(p, q) # collect emails by tree for email, p in email_to_id.iteritems(): parent = uf.find(p) id_to_emails[parent].append(email) return [[id_to_name[p]] + sorted(emails) for p, emails in id_to_emails.items()]
class ClusterGraph(): def __init__(self): self.edges = [] self.union_find = UnionFind() def add_edge(self, edge): self.edges.append(edge) self.union_find.add_element(edge[0]) self.union_find.add_element(edge[1]) def clusterfy(self, num_clusters=4): self.sort_edges() while len(self.union_find.clusters) > num_clusters: edge = self.edges.pop() leader1 = self.union_find.find(edge[0]) leader2 = self.union_find.find(edge[1]) if leader1 != leader2: self.union_find.union(leader1, leader2) def sort_edges(self): self.edges.sort(key=lambda edge: edge[2], reverse=True) def get_maximum_spacing(self): final_edges = [] for edge in self.edges: leader1 = self.union_find.find(edge[0]) leader2 = self.union_find.find(edge[1]) if leader1 != leader2: final_edges.append(edge) return min(final_edges, key=lambda edge: edge[2])[2]
def construct_graph(seq_dict, match_dict, threshold=90): uf = UnionFind(seq_dict) component_dict = dict() for match in match_dict.values(): q_seq = seq_dict[match.q_name] r_seq = seq_dict[match.r_name] if match.q_global_identity > threshold or match.r_global_identity > threshold: uf.union(q_seq.name, r_seq.name) uf.rename_component() for seq_name in seq_dict.keys(): seq = seq_dict[seq_name] component_label = uf.component_label[seq_name] component_size = uf.component_size[component_label] seq.label['component'] = component_label component = Component(component_label) component.add_member(seq) if component_label in component_dict: component_dict[component_label].add_member(seq) else: component_dict[component_label] = component return uf, component_dict
def numIslands2(self, m, n, positions): """ :type m: int :type n: int :type positions: List[List[int]] :rtype: List[int] """ uf = UnionFind(m * n) added = set() count = 0 # current number of islands res = [] for i, j in positions: p = i * n + j added.add((i, j)) count += 1 for x, y in [(i - 1, j), (i, j - 1), (i + 1, j), (i, j + 1)]: if 0 <= x < m and 0 <= y < n and (x, y) in added: q = x * n + y # reduce count if two nodes are connected # but currently in different trees if uf.find(p) != uf.find(q): count -= 1 uf.union(p, q) res.append(count) return res
def minSpanningTree(nodes, edges): ''' Input: A set of nodes and a set of edges, where each edge is specified by a tuple (u,v) where u,v are distinct nodes in "nodes". Output: A set of edges that forms a minimum-weight spanning tree of the graph (if it is connected). Running time: O( |E|*log |E| ) ''' mst = [] shuffle(edges) # randomizes the edges to be connected uf = UnionFind(nodes) for e in edges: if uf.union(e[0], e[1]): mst.append((e[0], e[1], 1)) # each edge is given a weight 1 return mst
def find_k(input_list): clusters = UnionFind() for number in input_list: if not clusters.in_union(number): clusters.add(number) neighbors = generate_neighbors(number) for neighbor in neighbors: if neighbor in numbers: clusters.union(number, neighbor) return clusters.size
def min_tree(self): a = WeightedGraph() union_obj = UnionFind() edges = list() for vertex in self.vertexes.keys(): union_obj.creation(vertex) a.add_vertex(vertex) for v in self.vertexes[vertex]: edges.append((self.vertexes[vertex][v], (vertex, v))) for (_, (ver, vertex)) in sorted(edges): if union_obj.search(ver) != union_obj.search(vertex): a.add_direct_link(ver, vertex, self.vertexes[ver][vertex]) union_obj.union_sets(ver, vertex) return a
def test_find(self): uf = UnionFind([[1,2,3],[4,5],[6,7,8,9,0]]) self.assertEqual(uf.find(1), 1) self.assertEqual(uf.find(2), 1) self.assertEqual(uf.find(3), 1) self.assertEqual(uf.find(4), 4) self.assertEqual(uf.find(5), 4) self.assertEqual(uf.find(6), 6) self.assertEqual(uf.find(7), 6) self.assertEqual(uf.find(8), 6) self.assertEqual(uf.find(9), 6) self.assertEqual(uf.find(0), 6)
def test_init(): uf = UnionFind(3) uf.add(('a', 1)) for i in xrange(3): assert(i in uf) assert(('a', 1) in uf) uf = UnionFind(letter_data) for i in letter_data: assert(i in uf) assert(1 not in uf) uf = uf.copy() uf = UnionFind(letter_data) for i in letter_data: assert(i in uf)
def kruskal(nodes:List(Int), edges:List(Tuple(Int, Int, Int)), edges_to_check:List(Tuple(Int, Int)))\ ->List(Tuple(Int, Int, Int)): sets = UnionFind({}) mst = [] for n in nodes: sets.add_node(n) for e in sorted(edges, key=itemgetter(2)): n1 = e[0] n2 = e[1] l1 = sets.find(n1) l2 = sets.find(n2) if l1 != l2: (e1, e2, w) = e if ((e1, e2) in edges_to_check) or (e2, e1) in edges_to_check: mst.append(e) sets.union(l1, l2) return mst
def __cluster(self): uf = UnionFind(self.graph.V()) min_spacing = 1e100 for edge in self.__sorted_edges(): u, v, w = edge if uf.count_components() > self.k and not uf.connected(u, v): uf.union(u, v) elif not uf.connected(u, v): # once we have k clusters, # examine each cross-clusters edge and pick the minimum if min_spacing > w: min_spacing = w self.spacing = min_spacing