def boruvka_mst(G): """Finds MST using Boruvka's algoritm Params -------- G: NetworkX Graph An input weighted graph to find MST Returns -------- T: NetworkX Graph A minimum spanning tree """ T = nx.Graph() T.add_nodes_from(G.nodes) forest = UnionFind(G) def find_edge(comp): """Finds the minimum edge for the given connected component""" minw = np.inf border = None for e in nx.edge_boundary(G, comp, data=True): w = e[-1].get('weight', 1) if w < minw: minw = w border = e return border min_edges = (find_edge(comp) for comp in forest.to_sets()) min_edges = [edge for edge in min_edges if edge is not None] while min_edges: min_edges = (find_edge(comp) for comp in forest.to_sets()) min_edges = [edge for edge in min_edges if edge is not None] for u, v, w in min_edges: if forest[u] != forest[v]: T.add_edge(u, v, weight=w['weight']) forest.union(u, v) return T
def calculate_hamming_clusters(numbers): numbers_map = defaultdict(list) for index, node in enumerate(numbers): numbers_map[node].append(index) # no duplicates in the union find union_find = UnionFind(numbers_map) hamming_distance_one = [1 << i for i in range(24)] hamming_distance_two = [ 1 << i ^ 1 << j for i, j in itertools.combinations(range(24), 2) ] hamming_distances = [*hamming_distance_one, *hamming_distance_two] keys = list(numbers_map) for distance_mask in hamming_distances: for key in keys: key2 = key ^ distance_mask if numbers_map[key2]: union_find.union(key, key2) return len(list(union_find.to_sets()))
def boruvka_mst_edges(G, minimum=True, weight='weight', keys=False, data=True, ignore_nan=False): """Iterate over edges of a Borůvka's algorithm min/max spanning tree. Parameters ---------- G : NetworkX Graph The edges of `G` must have distinct weights, otherwise the edges may not form a tree. minimum : bool (default: True) Find the minimum (True) or maximum (False) spanning tree. weight : string (default: 'weight') The name of the edge attribute holding the edge weights. keys : bool (default: True) This argument is ignored since this function is not implemented for multigraphs; it exists only for consistency with the other minimum spanning tree functions. data : bool (default: True) Flag for whether to yield edge attribute dicts. If True, yield edges `(u, v, d)`, where `d` is the attribute dict. If False, yield edges `(u, v)`. ignore_nan : bool (default: False) If a NaN is found as an edge weight normally an exception is raised. If `ignore_nan is True` then that edge is ignored instead. """ # Initialize a forest, assuming initially that it is the discrete # partition of the nodes of the graph. forest = UnionFind(G) def best_edge(component): """Returns the optimum (minimum or maximum) edge on the edge boundary of the given set of nodes. A return value of ``None`` indicates an empty boundary. """ sign = 1 if minimum else -1 minwt = float('inf') boundary = None for e in nx.edge_boundary(G, component, data=True): wt = e[-1].get(weight, 1) * sign if isnan(wt): if ignore_nan: continue msg = "NaN found as an edge weight. Edge %s" raise ValueError(msg % (e, )) if wt < minwt: minwt = wt boundary = e return boundary # Determine the optimum edge in the edge boundary of each component # in the forest. best_edges = (best_edge(component) for component in forest.to_sets()) best_edges = [edge for edge in best_edges if edge is not None] # If each entry was ``None``, that means the graph was disconnected, # so we are done generating the forest. while best_edges: # Determine the optimum edge in the edge boundary of each # component in the forest. # # This must be a sequence, not an iterator. In this list, the # same edge may appear twice, in different orientations (but # that's okay, since a union operation will be called on the # endpoints the first time it is seen, but not the second time). # # Any ``None`` indicates that the edge boundary for that # component was empty, so that part of the forest has been # completed. # # TODO This can be parallelized, both in the outer loop over # each component in the forest and in the computation of the # minimum. (Same goes for the identical lines outside the loop.) best_edges = (best_edge(component) for component in forest.to_sets()) best_edges = [edge for edge in best_edges if edge is not None] # Join trees in the forest using the best edges, and yield that # edge, since it is part of the spanning tree. # # TODO This loop can be parallelized, to an extent (the union # operation must be atomic). for u, v, d in best_edges: if forest[u] != forest[v]: if data: yield u, v, d else: yield u, v forest.union(u, v)
def boruvka_mst_edges(G, minimum=True, weight='weight', keys=False, data=True): """Iterates over the edges of a minimum spanning tree as computed by Borůvka's algorithm. `G` is a NetworkX graph. Also, the edges must have distinct weights, otherwise the edges may not form a tree. `weight` is the edge attribute that stores the edge weights. Each edge in the graph must have such an attribute, otherwise a :exc:`KeyError` will be raised. If `data` is True, this iterator yields edges of the form ``(u, v, d)``, where ``u`` and ``v`` are nodes and ``d`` is the edge attribute dictionary. Otherwise, it yields edges of the form ``(u, v)``. The `keys` argument is ignored, since this function is not implemented for multigraphs; it exists only for consistency with the other minimum spanning tree functions. """ opt = min if minimum else max # Initialize a forest, assuming initially that it is the discrete # partition of the nodes of the graph. forest = UnionFind(G) def best_edge(component): """Returns the optimum (minimum or maximum) edge on the edge boundary of the given set of nodes. A return value of ``None`` indicates an empty boundary. """ # TODO In Python 3.4 and later, we can just do # # boundary = nx.edge_boundary(G, component, data=weight) # return opt(boundary, key=lambda e: e[-1][weight], default=None) # # which is better because it doesn't require creating a list. boundary = list(nx.edge_boundary(G, component, data=True)) if not boundary: return None return opt(boundary, key=lambda e: e[-1][weight]) # Determine the optimum edge in the edge boundary of each component # in the forest. best_edges = (best_edge(component) for component in forest.to_sets()) best_edges = [edge for edge in best_edges if edge is not None] # If each entry was ``None``, that means the graph was disconnected, # so we are done generating the forest. while best_edges: # Determine the optimum edge in the edge boundary of each # component in the forest. # # This must be a sequence, not an iterator. In this list, the # same edge may appear twice, in different orientations (but # that's okay, since a union operation will be called on the # endpoints the first time it is seen, but not the second time). # # Any ``None`` indicates that the edge boundary for that # component was empty, so that part of the forest has been # completed. # # TODO This can be parallelized, both in the outer loop over # each component in the forest and in the computation of the # minimum. (Same goes for the identical lines outside the loop.) best_edges = (best_edge(component) for component in forest.to_sets()) best_edges = [edge for edge in best_edges if edge is not None] # Join trees in the forest using the best edges, and yield that # edge, since it is part of the spanning tree. # # TODO This loop can be parallelized, to an extent (the union # operation must be atomic). for u, v, d in best_edges: if forest[u] != forest[v]: if data: yield u, v, d else: yield u, v forest.union(u, v)
if __name__ == "__main__": filename = "clustering_big.txt" with open(filename, "r") as f: lines = f.readlines() n_nodes, n_bits = map(int, lines[0].split()) print(f'{n_nodes} nodes') print(f'{n_bits} bits per node') numbers = [int(''.join(line.split()), 2) for line in lines[1:]] nodes = {} for node, num in enumerate(numbers): if num not in nodes: nodes[num] = set() nodes[num].add(node) uf = UnionFind(range(n_nodes)) distances = [1 << i for i in range(n_bits)] distances += [(1 << ix_1) ^ (1 << ix_2) for (ix_1, ix_2) in itertools.combinations(range(n_bits), 2)] distances.append(0) for distance in distances: for number in nodes.keys(): if (number ^ distance) in nodes: for node_from in nodes[number]: for node_to in nodes[number ^ distance]: uf.union(node_from, node_to) print(len(list(uf.to_sets()))) # 6118
def kruskal_mst_edges(G, weight='weight', data=True): """Generate edges in a minimum spanning forest of an undirected weighted graph. Parameters ---------- G : NetworkX Graph weight : string Edge data key to use for weight (default 'weight'). data : bool, optional If True yield the edge data along with the edge. Returns ------- edges : iterator A generator that produces edges in the minimum spanning tree. The edges are three-tuples (u,v,w) where w is the weight. """ subtrees = UnionFind() edges = sorted( G.edges(data=True), key=lambda t: t[2].get("weight")) #sorted by edge weights first edges_no_weights = [e[0:2] for e in edges] #same order as edges edges_copy = edges.copy() available_edges = len(G.nodes) - 1 dominated = set() #set of dominated vertices num_edges_mst = 0 while num_edges_mst < len(G.nodes): i = 0 u, v, d = edges_copy[0] while (u in dominated and v in dominated and i + 1 < len(edges_copy) and subtrees[u] == subtrees[v]): i += 1 u, v, d = edges_copy[i] if subtrees[u] != subtrees[v]: dominated.add(u) dominated.add(v) new_v_reached = 0 u_subtree = [] v_subtree = [] #for loop to find the number of new vertices reached: for x, y, w in edges_copy: if x in set(G.__getitem__(u)) or y in set( G.__getitem__(v)): #neighbors of u and v if x not in dominated: new_v_reached += 1 if y not in dominated: new_v_reached += 1 #all parts of current mst subtrees.union(u, v) curr_mst_vertices = [list(s) for s in subtrees.to_sets() if u in s] curr_mst_vertices = curr_mst_vertices[0] current_tree = nx.Graph() for v1, v2, v3 in G.edges.data(): if v1 in curr_mst_vertices and v2 in curr_mst_vertices: edges_ = edges[edges_no_weights.index((v1, v2))] current_tree.add_edge(v1, v2, weight=v3['weight']) before = average_pairwise_distance(current_tree) #add one edge to find increase in cost for v1, v2, v3 in edges_copy: #print(v1,v2,v3) new_tree = current_tree.copy() #edge (u, X) (v, X) (X, u) (X, v) if (v1 == u and v2 != v) or (v1 == v and v2 != v) or ( v2 == u and v1 != v) or (v2 == v and v1 != u): new_tree.add_edge(v1, v2, weight=v3['weight']) after = average_pairwise_distance(new_tree) edge_update = edges_copy[edges_no_weights.index((v1, v2))] edge_update_list = list(edge_update[0:2]) if after - before > 0 and new_v_reached != 0: edge_update_list.append( {'weight': ((after - before) / new_v_reached)}) edges_copy[edges_no_weights.index( (v1, v2))] = edge_update_list #update edges_copy edges_copy = sorted(edges_copy, key=lambda x: x[2]['weight']) #available_edges -= 1 num_edges_mst += 1 yield (u, v, edges[edges_no_weights.index((u, v))][2]) if num_edges_mst == len(G.nodes) - 1: break
def boruvka_mst_edges(G, minimum=True, weight='weight', keys=False, data=True): """Iterates over the edges of a minimum spanning tree as computed by Borůvka's algorithm. `G` is a NetworkX graph. Also, the edges must have distinct weights, otherwise the edges may not form a tree. `weight` is the edge attribute that stores the edge weights. Each edge in the graph must have such an attribute, otherwise a :exc:`KeyError` will be raised. If `data` is True, this iterator yields edges of the form ``(u, v, d)``, where ``u`` and ``v`` are nodes and ``d`` is the edge attribute dictionary. Otherwise, it yields edges of the form ``(u, v)``. The `keys` argument is ignored, since this function is not implemented for multigraphs; it exists only for consistency with the other minimum spanning tree functions. """ opt = min if minimum else max # Initialize a forest, assuming initially that it is the discrete # partition of the nodes of the graph. forest = UnionFind(G) def best_edge(component): """Returns the optimum (minimum or maximum) edge on the edge boundary of the given set of nodes. A return value of ``None`` indicates an empty boundary. """ # TODO In Python 3.4 and later, we can just do # # boundary = nx.edge_boundary(G, component, data=weight) # return opt(boundary, key=lambda e: e[-1][weight], default=None) # # which is better because it doesn't require creating a list. boundary = list(nx.edge_boundary(G, component, data=True)) if not boundary: return None return opt(boundary, key=lambda e: e[-1][weight]) # Determine the optimum edge in the edge boundary of each component # in the forest. best_edges = (best_edge(component) for component in forest.to_sets()) best_edges = [edge for edge in best_edges if edge is not None] # If each entry was ``None``, that means the graph was disconnected, # so we are done generating the forest. while best_edges: # Determine the optimum edge in the edge boundary of each # component in the forest. # # This must be a sequence, not an iterator. In this list, the # same edge may appear twice, in different orientations (but # that's okay, since a union operation will be called on the # endpoints the first time it is seen, but not the second time). # # Any ``None`` indicates that the edge boundary for that # component was empty, so that part of the forest has been # completed. # # TODO This can be parallelized, both in the outer loop over # each component in the forest and in the computation of the # minimum. (Same goes for the identical lines outside the loop.) best_edges = (best_edge(component) for component in forest.to_sets()) best_edges = [edge for edge in best_edges if edge is not None] # Join trees in the forest using the best edges, and yield that # edge, since it is part of the spanning tree. # # TODO This loop can be parallelized, to an extent (the union # operation must be atomic). for u, v, d in best_edges: if forest[u] != forest[v]: if data: yield u, v, d else: yield u, v forest.union(u, v)
def boruvka_mst_edges(G, minimum=True, weight='weight', keys=False, data=True, ignore_nan=False): """Iterate over edges of a Borůvka's algorithm min/max spanning tree. Parameters ---------- G : NetworkX Graph The edges of `G` must have distinct weights, otherwise the edges may not form a tree. minimum : bool (default: True) Find the minimum (True) or maximum (False) spanning tree. weight : string (default: 'weight') The name of the edge attribute holding the edge weights. keys : bool (default: True) This argument is ignored since this function is not implemented for multigraphs; it exists only for consistency with the other minimum spanning tree functions. data : bool (default: True) Flag for whether to yield edge attribute dicts. If True, yield edges `(u, v, d)`, where `d` is the attribute dict. If False, yield edges `(u, v)`. ignore_nan : bool (default: False) If a NaN is found as an edge weight normally an exception is raised. If `ignore_nan is True` then that edge is ignored instead. """ # Initialize a forest, assuming initially that it is the discrete # partition of the nodes of the graph. forest = UnionFind(G) def best_edge(component): """Returns the optimum (minimum or maximum) edge on the edge boundary of the given set of nodes. A return value of ``None`` indicates an empty boundary. """ sign = 1 if minimum else -1 minwt = float('inf') boundary = None for e in nx.edge_boundary(G, component, data=True): wt = e[-1].get(weight, 1) * sign if isnan(wt): if ignore_nan: continue msg = "NaN found as an edge weight. Edge %s" raise ValueError(msg % (e,)) if wt < minwt: minwt = wt boundary = e return boundary # Determine the optimum edge in the edge boundary of each component # in the forest. best_edges = (best_edge(component) for component in forest.to_sets()) best_edges = [edge for edge in best_edges if edge is not None] # If each entry was ``None``, that means the graph was disconnected, # so we are done generating the forest. while best_edges: # Determine the optimum edge in the edge boundary of each # component in the forest. # # This must be a sequence, not an iterator. In this list, the # same edge may appear twice, in different orientations (but # that's okay, since a union operation will be called on the # endpoints the first time it is seen, but not the second time). # # Any ``None`` indicates that the edge boundary for that # component was empty, so that part of the forest has been # completed. # # TODO This can be parallelized, both in the outer loop over # each component in the forest and in the computation of the # minimum. (Same goes for the identical lines outside the loop.) best_edges = (best_edge(component) for component in forest.to_sets()) best_edges = [edge for edge in best_edges if edge is not None] # Join trees in the forest using the best edges, and yield that # edge, since it is part of the spanning tree. # # TODO This loop can be parallelized, to an extent (the union # operation must be atomic). for u, v, d in best_edges: if forest[u] != forest[v]: if data: yield u, v, d else: yield u, v forest.union(u, v)
from networkx.utils import UnionFind #unionfind a, b = 1, 2 uf = UnionFind() uf.union(a, b) # aとbをマージ print(uf[a] == uf[b]) # aとbが同じか判定(uf[a]はaの根を返す) for group in uf.to_sets(): # すべてのグループのリストを返す pass ap=uf.weights[a] #aが属する集合の大きさを返す #https://qiita.com/kzm4269/items/081ff2fdb8a6b0a6112f #https://docs.pyq.jp/python/math_opt/graph.html import networkx as nx #最大流、最小カット g = nx.DiGraph() g.add_edges_from([(0, 3, {'capacity': 10}), (1, 2, {'capacity': 15})]) g.add_edge(1, 3, capacity=20) nx.maximum_flow(g, 1, 3) #(20, {0: {3: 0}, 3: {0: 0, 1: 0}, 1: {2: 0, 3: 20}, 2: {1: 0}}) nx.minimum_cut(g, 1, 3) #(20, ({1, 2}, {0, 3})) #最小費用流 G = nx.DiGraph() G.add_node("a", demand=-5) G.add_node("d", demand=5) G.add_edge("a", "b", weight=3, capacity=4) G.add_edge("a", "c", weight=6, capacity=10) G.add_edge("b", "d", weight=1, capacity=9)
for i,j in enumerate(theArray): n_a[j].append(i) #Create a UnionFind-instance with the nodes [0..n-1] Structure= UnionFind(range(node)) #create the bit-masks for Hamming distance 0 bit_mask0=[0] #Create an array of bit-masks for the distances. #Create bit-masks for Hamming distance 1 by shifting the 1-bit iteratively by 24 positions. bit_mask1=[1 << i for i in range(n_bits)] #create the bit-masks for Hamming distance 2 bit_mask2=[] for i in combinations(range(n_bits),2): bit_mask2.append(xor(1<<i[0],1<<i[1])) bit_mask=bit_mask0+bit_mask1+bit_mask2 for distance in bit_mask: for key in n_a: p2=xor(key,distance) if p2==key: Structure.union(*n_a[key]) if p2 !=key and p2 in n_a: Structure.union(*n_a[key],*n_a[p2]) print(len(list(Structure.to_sets())))