示例#1
0
def boruvka_mst(G):
    """Finds MST using Boruvka's algoritm
    
    Params
    --------
    G: NetworkX Graph
        An input weighted graph to find MST
    
    Returns
    --------
    T: NetworkX Graph
        A minimum spanning tree
    """
    T = nx.Graph()
    T.add_nodes_from(G.nodes)

    forest = UnionFind(G)

    def find_edge(comp):
        """Finds the minimum edge for the given connected component"""

        minw = np.inf
        border = None

        for e in nx.edge_boundary(G, comp, data=True):
            w = e[-1].get('weight', 1)

            if w < minw:
                minw = w
                border = e

        return border

    min_edges = (find_edge(comp) for comp in forest.to_sets())
    min_edges = [edge for edge in min_edges if edge is not None]

    while min_edges:
        min_edges = (find_edge(comp) for comp in forest.to_sets())
        min_edges = [edge for edge in min_edges if edge is not None]

        for u, v, w in min_edges:
            if forest[u] != forest[v]:
                T.add_edge(u, v, weight=w['weight'])
                forest.union(u, v)

    return T
示例#2
0
def calculate_hamming_clusters(numbers):
    numbers_map = defaultdict(list)
    for index, node in enumerate(numbers):
        numbers_map[node].append(index)

    # no duplicates in the union find
    union_find = UnionFind(numbers_map)

    hamming_distance_one = [1 << i for i in range(24)]
    hamming_distance_two = [
        1 << i ^ 1 << j for i, j in itertools.combinations(range(24), 2)
    ]

    hamming_distances = [*hamming_distance_one, *hamming_distance_two]

    keys = list(numbers_map)
    for distance_mask in hamming_distances:
        for key in keys:
            key2 = key ^ distance_mask
            if numbers_map[key2]:
                union_find.union(key, key2)

    return len(list(union_find.to_sets()))
示例#3
0
def boruvka_mst_edges(G,
                      minimum=True,
                      weight='weight',
                      keys=False,
                      data=True,
                      ignore_nan=False):
    """Iterate over edges of a Borůvka's algorithm min/max spanning tree.

    Parameters
    ----------
    G : NetworkX Graph
        The edges of `G` must have distinct weights,
        otherwise the edges may not form a tree.

    minimum : bool (default: True)
        Find the minimum (True) or maximum (False) spanning tree.

    weight : string (default: 'weight')
        The name of the edge attribute holding the edge weights.

    keys : bool (default: True)
        This argument is ignored since this function is not
        implemented for multigraphs; it exists only for consistency
        with the other minimum spanning tree functions.

    data : bool (default: True)
        Flag for whether to yield edge attribute dicts.
        If True, yield edges `(u, v, d)`, where `d` is the attribute dict.
        If False, yield edges `(u, v)`.

    ignore_nan : bool (default: False)
        If a NaN is found as an edge weight normally an exception is raised.
        If `ignore_nan is True` then that edge is ignored instead.

    """
    # Initialize a forest, assuming initially that it is the discrete
    # partition of the nodes of the graph.
    forest = UnionFind(G)

    def best_edge(component):
        """Returns the optimum (minimum or maximum) edge on the edge
        boundary of the given set of nodes.

        A return value of ``None`` indicates an empty boundary.

        """
        sign = 1 if minimum else -1
        minwt = float('inf')
        boundary = None
        for e in nx.edge_boundary(G, component, data=True):
            wt = e[-1].get(weight, 1) * sign
            if isnan(wt):
                if ignore_nan:
                    continue
                msg = "NaN found as an edge weight. Edge %s"
                raise ValueError(msg % (e, ))
            if wt < minwt:
                minwt = wt
                boundary = e
        return boundary

    # Determine the optimum edge in the edge boundary of each component
    # in the forest.
    best_edges = (best_edge(component) for component in forest.to_sets())
    best_edges = [edge for edge in best_edges if edge is not None]
    # If each entry was ``None``, that means the graph was disconnected,
    # so we are done generating the forest.
    while best_edges:
        # Determine the optimum edge in the edge boundary of each
        # component in the forest.
        #
        # This must be a sequence, not an iterator. In this list, the
        # same edge may appear twice, in different orientations (but
        # that's okay, since a union operation will be called on the
        # endpoints the first time it is seen, but not the second time).
        #
        # Any ``None`` indicates that the edge boundary for that
        # component was empty, so that part of the forest has been
        # completed.
        #
        # TODO This can be parallelized, both in the outer loop over
        # each component in the forest and in the computation of the
        # minimum. (Same goes for the identical lines outside the loop.)
        best_edges = (best_edge(component) for component in forest.to_sets())
        best_edges = [edge for edge in best_edges if edge is not None]
        # Join trees in the forest using the best edges, and yield that
        # edge, since it is part of the spanning tree.
        #
        # TODO This loop can be parallelized, to an extent (the union
        # operation must be atomic).
        for u, v, d in best_edges:
            if forest[u] != forest[v]:
                if data:
                    yield u, v, d
                else:
                    yield u, v
                forest.union(u, v)
示例#4
0
文件: mst.py 项目: jklaise/networkx
def boruvka_mst_edges(G, minimum=True, weight='weight', keys=False, data=True):
    """Iterates over the edges of a minimum spanning tree as computed by
    Borůvka's algorithm.

    `G` is a NetworkX graph. Also, the edges must have distinct weights,
    otherwise the edges may not form a tree.

    `weight` is the edge attribute that stores the edge weights. Each
    edge in the graph must have such an attribute, otherwise a
    :exc:`KeyError` will be raised.

    If `data` is True, this iterator yields edges of the form
    ``(u, v, d)``, where ``u`` and ``v`` are nodes and ``d`` is the edge
    attribute dictionary. Otherwise, it yields edges of the form
    ``(u, v)``.

    The `keys` argument is ignored, since this function is not
    implemented for multigraphs; it exists only for consistency with the
    other minimum spanning tree functions.

    """
    opt = min if minimum else max
    # Initialize a forest, assuming initially that it is the discrete
    # partition of the nodes of the graph.
    forest = UnionFind(G)

    def best_edge(component):
        """Returns the optimum (minimum or maximum) edge on the edge
        boundary of the given set of nodes.

        A return value of ``None`` indicates an empty boundary.

        """
        # TODO In Python 3.4 and later, we can just do
        #
        #     boundary = nx.edge_boundary(G, component, data=weight)
        #     return opt(boundary, key=lambda e: e[-1][weight], default=None)
        #
        # which is better because it doesn't require creating a list.
        boundary = list(nx.edge_boundary(G, component, data=True))
        if not boundary:
            return None
        return opt(boundary, key=lambda e: e[-1][weight])

    # Determine the optimum edge in the edge boundary of each component
    # in the forest.
    best_edges = (best_edge(component) for component in forest.to_sets())
    best_edges = [edge for edge in best_edges if edge is not None]
    # If each entry was ``None``, that means the graph was disconnected,
    # so we are done generating the forest.
    while best_edges:
        # Determine the optimum edge in the edge boundary of each
        # component in the forest.
        #
        # This must be a sequence, not an iterator. In this list, the
        # same edge may appear twice, in different orientations (but
        # that's okay, since a union operation will be called on the
        # endpoints the first time it is seen, but not the second time).
        #
        # Any ``None`` indicates that the edge boundary for that
        # component was empty, so that part of the forest has been
        # completed.
        #
        # TODO This can be parallelized, both in the outer loop over
        # each component in the forest and in the computation of the
        # minimum. (Same goes for the identical lines outside the loop.)
        best_edges = (best_edge(component) for component in forest.to_sets())
        best_edges = [edge for edge in best_edges if edge is not None]
        # Join trees in the forest using the best edges, and yield that
        # edge, since it is part of the spanning tree.
        #
        # TODO This loop can be parallelized, to an extent (the union
        # operation must be atomic).
        for u, v, d in best_edges:
            if forest[u] != forest[v]:
                if data:
                    yield u, v, d
                else:
                    yield u, v
                forest.union(u, v)
示例#5
0
if __name__ == "__main__":
    filename = "clustering_big.txt"
    with open(filename, "r") as f:
        lines = f.readlines()

    n_nodes, n_bits = map(int, lines[0].split())
    print(f'{n_nodes} nodes')
    print(f'{n_bits} bits per node')

    numbers = [int(''.join(line.split()), 2) for line in lines[1:]]
    nodes = {}
    for node, num in enumerate(numbers):
        if num not in nodes:
            nodes[num] = set()
        nodes[num].add(node)

    uf = UnionFind(range(n_nodes))

    distances = [1 << i for i in range(n_bits)]
    distances += [(1 << ix_1) ^ (1 << ix_2) for (ix_1, ix_2) in itertools.combinations(range(n_bits), 2)]
    distances.append(0)

    for distance in distances:
        for number in nodes.keys():
            if (number ^ distance) in nodes:
                for node_from in nodes[number]:
                    for node_to in nodes[number ^ distance]:
                        uf.union(node_from, node_to)
    print(len(list(uf.to_sets())))  # 6118
示例#6
0
def kruskal_mst_edges(G, weight='weight', data=True):
    """Generate edges in a minimum spanning forest of an undirected
    weighted graph.
    Parameters
    ----------
    G : NetworkX Graph
    weight : string
       Edge data key to use for weight (default 'weight').
    data : bool, optional
       If True yield the edge data along with the edge.
    Returns
    -------
    edges : iterator
       A generator that produces edges in the minimum spanning tree.
       The edges are three-tuples (u,v,w) where w is the weight.
    """
    subtrees = UnionFind()
    edges = sorted(
        G.edges(data=True),
        key=lambda t: t[2].get("weight"))  #sorted by edge weights first
    edges_no_weights = [e[0:2] for e in edges]  #same order as edges
    edges_copy = edges.copy()
    available_edges = len(G.nodes) - 1
    dominated = set()  #set of dominated vertices
    num_edges_mst = 0

    while num_edges_mst < len(G.nodes):
        i = 0
        u, v, d = edges_copy[0]
        while (u in dominated and v in dominated and i + 1 < len(edges_copy)
               and subtrees[u] == subtrees[v]):
            i += 1
            u, v, d = edges_copy[i]
        if subtrees[u] != subtrees[v]:
            dominated.add(u)
            dominated.add(v)
            new_v_reached = 0
            u_subtree = []
            v_subtree = []

            #for loop to find the number of new vertices reached:
            for x, y, w in edges_copy:
                if x in set(G.__getitem__(u)) or y in set(
                        G.__getitem__(v)):  #neighbors of u and v
                    if x not in dominated:
                        new_v_reached += 1
                    if y not in dominated:
                        new_v_reached += 1

            #all parts of current mst
            subtrees.union(u, v)
            curr_mst_vertices = [list(s) for s in subtrees.to_sets() if u in s]
            curr_mst_vertices = curr_mst_vertices[0]
            current_tree = nx.Graph()
            for v1, v2, v3 in G.edges.data():
                if v1 in curr_mst_vertices and v2 in curr_mst_vertices:
                    edges_ = edges[edges_no_weights.index((v1, v2))]
                    current_tree.add_edge(v1, v2, weight=v3['weight'])
            before = average_pairwise_distance(current_tree)

            #add one edge to find increase in cost
            for v1, v2, v3 in edges_copy:
                #print(v1,v2,v3)
                new_tree = current_tree.copy()
                #edge (u, X) (v, X) (X, u) (X, v)
                if (v1 == u and v2 != v) or (v1 == v and v2 != v) or (
                        v2 == u and v1 != v) or (v2 == v and v1 != u):
                    new_tree.add_edge(v1, v2, weight=v3['weight'])
                    after = average_pairwise_distance(new_tree)
                    edge_update = edges_copy[edges_no_weights.index((v1, v2))]
                    edge_update_list = list(edge_update[0:2])
                    if after - before > 0 and new_v_reached != 0:
                        edge_update_list.append(
                            {'weight': ((after - before) / new_v_reached)})
                        edges_copy[edges_no_weights.index(
                            (v1, v2))] = edge_update_list
            #update edges_copy
            edges_copy = sorted(edges_copy, key=lambda x: x[2]['weight'])
            #available_edges -= 1
            num_edges_mst += 1
            yield (u, v, edges[edges_no_weights.index((u, v))][2])
            if num_edges_mst == len(G.nodes) - 1:
                break
示例#7
0
def boruvka_mst_edges(G, minimum=True, weight='weight', keys=False, data=True):
    """Iterates over the edges of a minimum spanning tree as computed by
    Borůvka's algorithm.

    `G` is a NetworkX graph. Also, the edges must have distinct weights,
    otherwise the edges may not form a tree.

    `weight` is the edge attribute that stores the edge weights. Each
    edge in the graph must have such an attribute, otherwise a
    :exc:`KeyError` will be raised.

    If `data` is True, this iterator yields edges of the form
    ``(u, v, d)``, where ``u`` and ``v`` are nodes and ``d`` is the edge
    attribute dictionary. Otherwise, it yields edges of the form
    ``(u, v)``.

    The `keys` argument is ignored, since this function is not
    implemented for multigraphs; it exists only for consistency with the
    other minimum spanning tree functions.

    """
    opt = min if minimum else max
    # Initialize a forest, assuming initially that it is the discrete
    # partition of the nodes of the graph.
    forest = UnionFind(G)

    def best_edge(component):
        """Returns the optimum (minimum or maximum) edge on the edge
        boundary of the given set of nodes.

        A return value of ``None`` indicates an empty boundary.

        """
        # TODO In Python 3.4 and later, we can just do
        #
        #     boundary = nx.edge_boundary(G, component, data=weight)
        #     return opt(boundary, key=lambda e: e[-1][weight], default=None)
        #
        # which is better because it doesn't require creating a list.
        boundary = list(nx.edge_boundary(G, component, data=True))
        if not boundary:
            return None
        return opt(boundary, key=lambda e: e[-1][weight])

    # Determine the optimum edge in the edge boundary of each component
    # in the forest.
    best_edges = (best_edge(component) for component in forest.to_sets())
    best_edges = [edge for edge in best_edges if edge is not None]
    # If each entry was ``None``, that means the graph was disconnected,
    # so we are done generating the forest.
    while best_edges:
        # Determine the optimum edge in the edge boundary of each
        # component in the forest.
        #
        # This must be a sequence, not an iterator. In this list, the
        # same edge may appear twice, in different orientations (but
        # that's okay, since a union operation will be called on the
        # endpoints the first time it is seen, but not the second time).
        #
        # Any ``None`` indicates that the edge boundary for that
        # component was empty, so that part of the forest has been
        # completed.
        #
        # TODO This can be parallelized, both in the outer loop over
        # each component in the forest and in the computation of the
        # minimum. (Same goes for the identical lines outside the loop.)
        best_edges = (best_edge(component) for component in forest.to_sets())
        best_edges = [edge for edge in best_edges if edge is not None]
        # Join trees in the forest using the best edges, and yield that
        # edge, since it is part of the spanning tree.
        #
        # TODO This loop can be parallelized, to an extent (the union
        # operation must be atomic).
        for u, v, d in best_edges:
            if forest[u] != forest[v]:
                if data:
                    yield u, v, d
                else:
                    yield u, v
                forest.union(u, v)
示例#8
0
def boruvka_mst_edges(G, minimum=True, weight='weight',
                      keys=False, data=True, ignore_nan=False):
    """Iterate over edges of a Borůvka's algorithm min/max spanning tree.

    Parameters
    ----------
    G : NetworkX Graph
        The edges of `G` must have distinct weights,
        otherwise the edges may not form a tree.

    minimum : bool (default: True)
        Find the minimum (True) or maximum (False) spanning tree.

    weight : string (default: 'weight')
        The name of the edge attribute holding the edge weights.

    keys : bool (default: True)
        This argument is ignored since this function is not
        implemented for multigraphs; it exists only for consistency
        with the other minimum spanning tree functions.

    data : bool (default: True)
        Flag for whether to yield edge attribute dicts.
        If True, yield edges `(u, v, d)`, where `d` is the attribute dict.
        If False, yield edges `(u, v)`.

    ignore_nan : bool (default: False)
        If a NaN is found as an edge weight normally an exception is raised.
        If `ignore_nan is True` then that edge is ignored instead.

    """
    # Initialize a forest, assuming initially that it is the discrete
    # partition of the nodes of the graph.
    forest = UnionFind(G)

    def best_edge(component):
        """Returns the optimum (minimum or maximum) edge on the edge
        boundary of the given set of nodes.

        A return value of ``None`` indicates an empty boundary.

        """
        sign = 1 if minimum else -1
        minwt = float('inf')
        boundary = None
        for e in nx.edge_boundary(G, component, data=True):
            wt = e[-1].get(weight, 1) * sign
            if isnan(wt):
                if ignore_nan:
                    continue
                msg = "NaN found as an edge weight. Edge %s"
                raise ValueError(msg % (e,))
            if wt < minwt:
                minwt = wt
                boundary = e
        return boundary

    # Determine the optimum edge in the edge boundary of each component
    # in the forest.
    best_edges = (best_edge(component) for component in forest.to_sets())
    best_edges = [edge for edge in best_edges if edge is not None]
    # If each entry was ``None``, that means the graph was disconnected,
    # so we are done generating the forest.
    while best_edges:
        # Determine the optimum edge in the edge boundary of each
        # component in the forest.
        #
        # This must be a sequence, not an iterator. In this list, the
        # same edge may appear twice, in different orientations (but
        # that's okay, since a union operation will be called on the
        # endpoints the first time it is seen, but not the second time).
        #
        # Any ``None`` indicates that the edge boundary for that
        # component was empty, so that part of the forest has been
        # completed.
        #
        # TODO This can be parallelized, both in the outer loop over
        # each component in the forest and in the computation of the
        # minimum. (Same goes for the identical lines outside the loop.)
        best_edges = (best_edge(component) for component in forest.to_sets())
        best_edges = [edge for edge in best_edges if edge is not None]
        # Join trees in the forest using the best edges, and yield that
        # edge, since it is part of the spanning tree.
        #
        # TODO This loop can be parallelized, to an extent (the union
        # operation must be atomic).
        for u, v, d in best_edges:
            if forest[u] != forest[v]:
                if data:
                    yield u, v, d
                else:
                    yield u, v
                forest.union(u, v)
from networkx.utils import UnionFind
#unionfind
a, b = 1, 2
uf = UnionFind()
uf.union(a, b)  # aとbをマージ
print(uf[a] == uf[b])  # aとbが同じか判定(uf[a]はaの根を返す)
for group in uf.to_sets():  # すべてのグループのリストを返す
    pass
ap=uf.weights[a] #aが属する集合の大きさを返す

#https://qiita.com/kzm4269/items/081ff2fdb8a6b0a6112f
#https://docs.pyq.jp/python/math_opt/graph.html
import networkx as nx

#最大流、最小カット
g = nx.DiGraph()
g.add_edges_from([(0, 3, {'capacity': 10}),
                  (1, 2, {'capacity': 15})])
g.add_edge(1, 3, capacity=20)
nx.maximum_flow(g, 1, 3)
#(20, {0: {3: 0}, 3: {0: 0, 1: 0}, 1: {2: 0, 3: 20}, 2: {1: 0}})
nx.minimum_cut(g, 1, 3)
#(20, ({1, 2}, {0, 3}))

#最小費用流
G = nx.DiGraph()
G.add_node("a", demand=-5)
G.add_node("d", demand=5)
G.add_edge("a", "b", weight=3, capacity=4)
G.add_edge("a", "c", weight=6, capacity=10)
G.add_edge("b", "d", weight=1, capacity=9)
示例#10
0
for i,j in enumerate(theArray):
    n_a[j].append(i)


#Create a UnionFind-instance with the nodes [0..n-1] 
Structure= UnionFind(range(node))
#create the bit-masks for Hamming distance 0
bit_mask0=[0]
#Create an array of bit-masks for the distances.
#Create bit-masks for Hamming distance 1 by shifting the 1-bit iteratively by 24 positions. 
bit_mask1=[1 << i for i in range(n_bits)]
#create the bit-masks for Hamming distance 2
bit_mask2=[]
for i in combinations(range(n_bits),2):
    bit_mask2.append(xor(1<<i[0],1<<i[1]))
    
bit_mask=bit_mask0+bit_mask1+bit_mask2


for distance in bit_mask:
    for key in n_a:
        p2=xor(key,distance)
        if p2==key:
            Structure.union(*n_a[key])
        if p2 !=key and p2 in n_a:
            Structure.union(*n_a[key],*n_a[p2])
            

print(len(list(Structure.to_sets())))