Пример #1
0
def asyn_lpa_communities(G, weight=None, seed=None):

    labels = {n: i for i, n in enumerate(G)}
    cont = True
    while cont:
        cont = False
        nodes = list(G)
        seed.shuffle(nodes)
        # Calculate the label for each node
        for node in nodes:
            if len(G[node]) < 1:
                continue

            # Get label frequencies. Depending on the order they are processed
            # in some nodes with be in t and others in t-1, making the
            # algorithm asynchronous.
            label_freq = Counter()
            for v in G[node]:
                label_freq.update({labels[v]: G.edges[node, v][weight]
                                   if weight else 1})
            # Choose the label with the highest frecuency. If more than 1 label
            # has the highest frecuency choose one randomly.
            max_freq = max(label_freq.values())
            best_labels = [label for label, freq in label_freq.items()
                           if freq == max_freq]

            # Continue until all nodes have a majority label
            if labels[node] not in best_labels:
                labels[node] = seed.choice(best_labels)
                cont = True

    yield from groups(labels).values()
Пример #2
0
    def to_sets(self):
        """Iterates over the sets stored in this structure.

        For example::

            >>> partition = UnionFind('xyz')
            >>> sorted(map(sorted, partition.to_sets()))
            [['x'], ['y'], ['z']]
            >>> partition.union('x', 'y')
            >>> sorted(map(sorted, partition.to_sets()))
            [['x', 'y'], ['z']]

        """
        # TODO In Python 3.3+, this should be `yield from ...`.
        for block in groups(self.parents).values():
            yield block
Пример #3
0
    def to_sets(self):
        """Iterates over the sets stored in this structure.

        For example::

            >>> partition = UnionFind('xyz')
            >>> sorted(map(sorted, partition.to_sets()))
            [['x'], ['y'], ['z']]
            >>> partition.union('x', 'y')
            >>> sorted(map(sorted, partition.to_sets()))
            [['x', 'y'], ['z']]

        """
        # TODO In Python 3.3+, this should be `yield from ...`.
        for block in groups(self.parents).values():
            yield block
Пример #4
0
    def to_sets(self):
        """Iterates over the sets stored in this structure.

        For example::

            >>> partition = UnionFind("xyz")
            >>> sorted(map(sorted, partition.to_sets()))
            [['x'], ['y'], ['z']]
            >>> partition.union("x", "y")
            >>> sorted(map(sorted, partition.to_sets()))
            [['x', 'y'], ['z']]

        """
        # Ensure fully pruned paths
        for x in self.parents.keys():
            _ = self[x]  # Evaluated for side-effect only

        yield from groups(self.parents).values()
Пример #5
0
    def to_sets(self):
        """Iterates over the sets stored in this structure.

        For example::

            >>> partition = UnionFind('xyz')
            >>> sorted(map(sorted, partition.to_sets()))
            [['x'], ['y'], ['z']]
            >>> partition.union('x', 'y')
            >>> sorted(map(sorted, partition.to_sets()))
            [['x', 'y'], ['z']]

        """
        # Ensure fully pruned paths
        for x in self.parents.keys():
            _ = self[x] # Evaluated for side-effect only

        # TODO In Python 3.3+, this should be `yield from ...`.
        for block in groups(self.parents).values():
            yield block
Пример #6
0
    def to_sets(self):
        """Iterates over the sets stored in this structure.

        For example::

            >>> partition = UnionFind('xyz')
            >>> sorted(map(sorted, partition.to_sets()))
            [['x'], ['y'], ['z']]
            >>> partition.union('x', 'y')
            >>> sorted(map(sorted, partition.to_sets()))
            [['x', 'y'], ['z']]

        """
        # Ensure fully pruned paths
        for x in self.parents.keys():
            _ = self[x]  # Evaluated for side-effect only

        # TODO In Python 3.3+, this should be `yield from ...`.
        for block in groups(self.parents).values():
            yield block
Пример #7
0
def voronoi_cells(G, center_nodes, weight='weight'):
    """Returns the Voronoi cells centered at `center_nodes` with respect
    to the shortest-path distance metric.

    If *C* is a set of nodes in the graph and *c* is an element of *C*,
    the *Voronoi cell* centered at a node *c* is the set of all nodes
    *v* that are closer to *c* than to any other center node in *C* with
    respect to the shortest-path distance metric. [1]_

    For directed graphs, this will compute the "outward" Voronoi cells,
    as defined in [1]_, in which distance is measured from the center
    nodes to the target node. For the "inward" Voronoi cells, use the
    :meth:`DiGraph.reverse` method to reverse the orientation of the
    edges before invoking this function on the directed graph.

    Parameters
    ----------
    G : NetworkX graph

    center_nodes : set
        A nonempty set of nodes in the graph `G` that represent the
        center of the Voronoi cells.

    weight : string or function
        The edge attribute (or an arbitrary function) representing the
        weight of an edge. This keyword argument is as described in the
        documentation for :func:`~networkx.multi_source_dijkstra_path`,
        for example.

    Returns
    -------
    dictionary
        A mapping from center node to set of all nodes in the graph
        closer to that center node than to any other center node. The
        keys of the dictionary are the element of `center_nodes`, and
        the values of the dictionary form a partition of the nodes of
        `G`.

    Examples
    --------
    To get only the partition of the graph induced by the Voronoi cells,
    take the collection of all values in the returned dictionary::

        >>> G = nx.path_graph(6)
        >>> center_nodes = {0, 3}
        >>> cells = nx.voronoi_cells(G, center_nodes)
        >>> partition = set(map(frozenset, cells.values()))
        >>> sorted(map(sorted, partition))
        [[0, 1], [2, 3, 4, 5]]

    Raises
    ------
    ValueError
        If `center_nodes` is empty.

    References
    ----------
    .. [1] Erwig, Martin. (2000),
           "The graph Voronoi diagram with applications."
           *Networks*, 36: 156--163.
           <dx.doi.org/10.1002/1097-0037(200010)36:3<156::AID-NET2>3.0.CO;2-L>

    """
    # Determine the shortest paths from any one of the center nodes to
    # every node in the graph.
    #
    # This raises `ValueError` if `center_nodes` is an empty set.
    paths = nx.multi_source_dijkstra_path(G, center_nodes, weight=weight)
    # Determine the center node from which the shortest path originates.
    nearest = {v: p[0] for v, p in paths.items()}
    # Get the mapping from center node to all nodes closer to it than to
    # any other center node.
    cells = groups(nearest)
    # We collect all unreachable nodes under a special key, if there are any.
    unreachable = set(G) - set(nearest)
    if unreachable:
        cells['unreachable'] = unreachable
    return cells
Пример #8
0
def asyn_fluidc(G, k, max_iter=100, seed=None):
    """Returns communities in `G` as detected by Fluid Communities algorithm.

    The asynchronous fluid communities algorithm is described in
    [1]_. The algorithm is based on the simple idea of fluids interacting
    in an environment, expanding and pushing each other. It's initialization is
    random, so found communities may vary on different executions.

    The algorithm proceeds as follows. First each of the initial k communities
    is initialized in a random vertex in the graph. Then the algorithm iterates
    over all vertices in a random order, updating the community of each vertex
    based on its own community and the communities of its neighbours. This
    process is performed several times until convergence.
    At all times, each community has a total density of 1, which is equally
    distributed among the vertices it contains. If a vertex changes of
    community, vertex densities of affected communities are adjusted
    immediately. When a complete iteration over all vertices is done, such that
    no vertex changes the community it belongs to, the algorithm has converged
    and returns.

    This is the original version of the algorithm described in [1]_.
    Unfortunately, it does not support weighted graphs yet.

    Parameters
    ----------
    G : Graph

    k : integer
        The number of communities to be found.

    max_iter : integer
        The number of maximum iterations allowed. By default 15.

    seed : integer, random_state, or None (default)
        Indicator of random number generation state.
        See :ref:`Randomness<randomness>`.

    Returns
    -------
    communities : iterable
        Iterable of communities given as sets of nodes.

    Notes
    -----
    k variable is not an optional argument.

    References
    ----------
    .. [1] Parés F., Garcia-Gasulla D. et al. "Fluid Communities: A
       Competitive and Highly Scalable Community Detection Algorithm".
       [https://arxiv.org/pdf/1703.09307.pdf].
    """
    # Initial checks
    if not isinstance(k, int):
        raise NetworkXError("k must be an integer.")
    if not k > 0:
        raise NetworkXError("k must be greater than 0.")
    if not is_connected(G):
        raise NetworkXError("Fluid Communities require connected Graphs.")
    if len(G) < k:
        raise NetworkXError("k cannot be bigger than the number of nodes.")
    # Initialization
    max_density = 1.0
    vertices = list(G)
    seed.shuffle(vertices)
    communities = {n: i for i, n in enumerate(vertices[:k])}
    density = {}
    com_to_numvertices = {}
    for vertex in communities.keys():
        com_to_numvertices[communities[vertex]] = 1
        density[communities[vertex]] = max_density
    # Set up control variables and start iterating
    iter_count = 0
    cont = True
    while cont:
        cont = False
        iter_count += 1
        # Loop over all vertices in graph in a random order
        vertices = list(G)
        seed.shuffle(vertices)
        for vertex in vertices:
            # Updating rule
            com_counter = Counter()
            # Take into account self vertex community
            try:
                com_counter.update(
                    {communities[vertex]: density[communities[vertex]]})
            except KeyError:
                pass
            # Gather neighbour vertex communities
            for v in G[vertex]:
                try:
                    com_counter.update(
                        {communities[v]: density[communities[v]]})
                except KeyError:
                    continue
            # Check which is the community with highest density
            new_com = -1
            if len(com_counter.keys()) > 0:
                max_freq = max(com_counter.values())
                best_communities = [
                    com for com, freq in com_counter.items()
                    if (max_freq - freq) < 0.0001
                ]
                # If actual vertex com in best communities, it is preserved
                try:
                    if communities[vertex] in best_communities:
                        new_com = communities[vertex]
                except KeyError:
                    pass
                # If vertex community changes...
                if new_com == -1:
                    # Set flag of non-convergence
                    cont = True
                    # Randomly chose a new community from candidates
                    new_com = seed.choice(best_communities)
                    # Update previous community status
                    try:
                        com_to_numvertices[communities[vertex]] -= 1
                        density[communities[vertex]] = max_density / \
                            com_to_numvertices[communities[vertex]]
                    except KeyError:
                        pass
                    # Update new community status
                    communities[vertex] = new_com
                    com_to_numvertices[communities[vertex]] += 1
                    density[communities[vertex]] = max_density / \
                        com_to_numvertices[communities[vertex]]
        # If maximum iterations reached --> output actual results
        if iter_count > max_iter:
            break
    # Return results by grouping communities as list of vertices
    return iter(groups(communities).values())
Пример #9
0
def asyn_fluidc(G, k, max_iter=100, seed=None):
    # noqa, pylint: disable=too-many-locals,too-many-branches,too-many-statements
    """This function is adapted from networks directly."""
    # Initial checks
    if not isinstance(k, int):
        raise NetworkXError("k must be an integer.")
    if not k > 0:
        raise NetworkXError("k must be greater than 0.")
    if not is_connected(G):
        raise NetworkXError("Fluid Communities require connected Graphs.")
    if len(G) < k:
        raise NetworkXError("k cannot be bigger than the number of nodes.")
    # Initialization
    max_density = 1.0
    vertices = list(G)
    seed.shuffle(vertices)
    communities = {n: i for i, n in enumerate(vertices[:k])}
    density = {}
    com_to_numvertices = {}
    for vertex in communities.keys():
        com_to_numvertices[communities[vertex]] = 1
        density[communities[vertex]] = max_density
    # Set up control variables and start iterating
    iter_count = 0
    cont = True
    while cont:
        cont = False
        iter_count += 1
        # Loop over all vertices in graph in a random order
        vertices = list(G)
        seed.shuffle(vertices)
        for vertex in vertices:
            # Updating rule
            com_counter = Counter()
            # Take into account self vertex community
            try:
                com_counter.update(
                    {communities[vertex]: density[communities[vertex]]})
            except KeyError:
                pass
            # Gather neighbour vertex communities
            for v in G[vertex]:
                try:
                    com_counter.update(
                        {communities[v]: density[communities[v]]})
                except KeyError:
                    continue
            # Check which is the community with highest density
            new_com = -1
            if com_counter.keys():
                max_freq = max(com_counter.values())
                best_communities = [
                    com for com, freq in com_counter.items()
                    if (max_freq - freq) < 0.0001
                ]
                # If actual vertex com in best communities, it is preserved
                try:
                    if communities[vertex] in best_communities:
                        new_com = communities[vertex]
                except KeyError:
                    pass
                # If vertex community changes...
                if new_com == -1:
                    # Set flag of non-convergence
                    cont = True
                    # Randomly chose a new community from candidates
                    new_com = seed.choice(best_communities)
                    # Update previous community status
                    try:
                        com_to_numvertices[communities[vertex]] -= 1
                        density[communities[vertex]] = (
                            max_density /
                            com_to_numvertices[communities[vertex]])
                    except KeyError:
                        pass
                    # Update new community status
                    communities[vertex] = new_com
                    com_to_numvertices[communities[vertex]] += 1
                    density[communities[vertex]] = (
                        max_density / com_to_numvertices[communities[vertex]])
        # If maximum iterations reached --> output actual results
        if iter_count > max_iter:
            break
    # Return results by grouping communities as list of vertices
    return list(iter(groups(communities).values())), list(density.values())
Пример #10
0
def asyn_fluidc(G, k, max_iter=100, seed=None):
    """Returns communities in `G` as detected by Fluid Communities algorithm.

    The asynchronous fluid communities algorithm is described in
    [1]_. The algorithm is based on the simple idea of fluids interacting
    in an environment, expanding and pushing each other. It's initialization is
    random, so found communities may vary on different executions.

    The algorithm proceeds as follows. First each of the initial k communities
    is initialized in a random vertex in the graph. Then the algorithm iterates
    over all vertices in a random order, updating the community of each vertex
    based on its own community and the communities of its neighbours. This
    process is performed several times until convergence.
    At all times, each community has a total density of 1, which is equally
    distributed among the vertices it contains. If a vertex changes of
    community, vertex densities of affected communities are adjusted
    immediately. When a complete iteration over all vertices is done, such that
    no vertex changes the community it belongs to, the algorithm has converged
    and returns.

    This is the original version of the algorithm described in [1]_.
    Unfortunately, it does not support weighted graphs yet.

    Parameters
    ----------
    G : Graph

    k : integer
        The number of communities to be found.

    max_iter : integer
        The number of maximum iterations allowed. By default 15.

    seed : integer, random_state, or None (default)
        Indicator of random number generation state.
        See :ref:`Randomness<randomness>`.

    Returns
    -------
    communities : iterable
        Iterable of communities given as sets of nodes.

    Notes
    -----
    k variable is not an optional argument.

    References
    ----------
    .. [1] Parés F., Garcia-Gasulla D. et al. "Fluid Communities: A
       Competitive and Highly Scalable Community Detection Algorithm".
       [https://arxiv.org/pdf/1703.09307.pdf].
    """
    # Initial checks
    if not isinstance(k, int):
        raise NetworkXError("k must be an integer.")
    if not k > 0:
        raise NetworkXError("k must be greater than 0.")
    if not is_connected(G):
        raise NetworkXError("Fluid Communities require connected Graphs.")
    if len(G) < k:
        raise NetworkXError("k cannot be bigger than the number of nodes.")
    # Initialization
    max_density = 1.0
    vertices = list(G)
    seed.shuffle(vertices)
    communities = {n: i for i, n in enumerate(vertices[:k])}
    density = {}
    com_to_numvertices = {}
    for vertex in communities.keys():
        com_to_numvertices[communities[vertex]] = 1
        density[communities[vertex]] = max_density
    # Set up control variables and start iterating
    iter_count = 0
    cont = True
    while cont:
        cont = False
        iter_count += 1
        # Loop over all vertices in graph in a random order
        vertices = list(G)
        seed.shuffle(vertices)
        for vertex in vertices:
            # Updating rule
            com_counter = Counter()
            # Take into account self vertex community
            try:
                com_counter.update({communities[vertex]:
                                    density[communities[vertex]]})
            except KeyError:
                pass
            # Gather neighbour vertex communities
            for v in G[vertex]:
                try:
                    com_counter.update({communities[v]:
                                        density[communities[v]]})
                except KeyError:
                    continue
            # Check which is the community with highest density
            new_com = -1
            if len(com_counter.keys()) > 0:
                max_freq = max(com_counter.values())
                best_communities = [com for com, freq in com_counter.items()
                                    if (max_freq - freq) < 0.0001]
                # If actual vertex com in best communities, it is preserved
                try:
                    if communities[vertex] in best_communities:
                        new_com = communities[vertex]
                except KeyError:
                    pass
                # If vertex community changes...
                if new_com == -1:
                    # Set flag of non-convergence
                    cont = True
                    # Randomly chose a new community from candidates
                    new_com = seed.choice(best_communities)
                    # Update previous community status
                    try:
                        com_to_numvertices[communities[vertex]] -= 1
                        density[communities[vertex]] = max_density / \
                            com_to_numvertices[communities[vertex]]
                    except KeyError:
                        pass
                    # Update new community status
                    communities[vertex] = new_com
                    com_to_numvertices[communities[vertex]] += 1
                    density[communities[vertex]] = max_density / \
                        com_to_numvertices[communities[vertex]]
        # If maximum iterations reached --> output actual results
        if iter_count > max_iter:
            break
    # Return results by grouping communities as list of vertices
    return iter(groups(communities).values())
Пример #11
0
def asyn_lpa_communities(G, weight=None, seed=None):
    """Returns communities in `G` as detected by asynchronous label
    propagation.

    The asynchronous label propagation algorithm is described in
    [1]_. The algorithm is probabilistic and the found communities may
    vary on different executions.

    The algorithm proceeds as follows. After initializing each node with
    a unique label, the algorithm repeatedly sets the label of a node to
    be the label that appears most frequently among that nodes
    neighbors. The algorithm halts when each node has the label that
    appears most frequently among its neighbors. The algorithm is
    asynchronous because each node is updated without waiting for
    updates on the remaining nodes.

    This generalized version of the algorithm in [1]_ accepts edge
    weights.

    Parameters
    ----------
    G : Graph

    weight : string
        The edge attribute representing the weight of an edge.
        If None, each edge is assumed to have weight one. In this
        algorithm, the weight of an edge is used in determining the
        frequency with which a label appears among the neighbors of a
        node: a higher weight means the label appears more often.

    seed : integer, random_state, or None (default)
        Indicator of random number generation state.
        See :ref:`Randomness<randomness>`.

    Returns
    -------
    communities : iterable
        Iterable of communities given as sets of nodes.

    Notes
    -----
    Edge weight attributes must be numerical.

    References
    ----------
    .. [1] Raghavan, Usha Nandini, Réka Albert, and Soundar Kumara. "Near
           linear time algorithm to detect community structures in large-scale
           networks." Physical Review E 76.3 (2007): 036106.
    """

    labels = {n: i for i, n in enumerate(G)}
    cont = True

    while cont:
        cont = False
        nodes = list(G)
        seed.shuffle(nodes)

        for node in nodes:

            if not G[node]:
                continue

            # Get label frequencies among adjacent nodes.
            # Depending on the order they are processed in,
            # some nodes will be in iteration t and others in t-1,
            # making the algorithm asynchronous.
            if weight is None:
                # initialising a Counter from an iterator of labels is
                # faster for getting unweighted label frequencies
                label_freq = Counter(map(labels.get, G[node]))
            else:
                # updating a defaultdict is substantially faster
                # for getting weighted label frequencies
                label_freq = defaultdict(float)
                for _, v, wt in G.edges(node, data=weight, default=1):
                    label_freq[labels[v]] += wt

            # Get the labels that appear with maximum frequency.
            max_freq = max(label_freq.values())
            best_labels = [
                label for label, freq in label_freq.items() if freq == max_freq
            ]

            # If the node does not have one of the maximum frequency labels,
            # randomly choose one of them and update the node's label.
            # Continue the iteration as long as at least one node
            # doesn't have a maximum frequency label.
            if labels[node] not in best_labels:
                labels[node] = seed.choice(best_labels)
                cont = True

    yield from groups(labels).values()
Пример #12
0
def test_groups():
    many_to_one = dict(zip('abcde', [0, 0, 1, 1, 2]))
    actual = groups(many_to_one)
    expected = {0: {'a', 'b'}, 1: {'c', 'd'}, 2: {'e'}}
    assert actual == expected
    assert {} == groups({})
Пример #13
0
def test_groups():
    many_to_one = dict(zip("abcde", [0, 0, 1, 1, 2]))
    actual = groups(many_to_one)
    expected = {0: {"a", "b"}, 1: {"c", "d"}, 2: {"e"}}
    assert actual == expected
    assert {} == groups({})
Пример #14
0
def asyn_lpa_communities(G, weight=None):
    """Returns communities in `G` as detected by asynchronous label
    propagation.

    The asynchronous label propagation algorithm is described in
    [1]_. The algorithm is probabilistic and the found communities may
    vary on different executions.

    The algorithm proceeds as follows. After initializing each node with
    a unique label, the algorithm repeatedly sets the label of a node to
    be the label that appears most frequently among that nodes
    neighbors. The algorithm halts when each node has the label that
    appears most frequently among its neighbors. The algorithm is
    asynchronous because each node is updated without waiting for
    updates on the remaining nodes.

    This generalized version of the algorithm in [1]_ accepts edge
    weights.

    Parameters
    ----------
    G : Graph

    weight : string
        The edge attribute representing the weight of an edge.
        If None, each edge is assumed to have weight one. In this
        algorithm, the weight of an edge is used in determining the
        frequency with which a label appears among the neighbors of a
        node: a higher weight means the label appears more often.

    Returns
    -------
    communities : iterable
        Iterable of communities given as sets of nodes.

    Notes
    ------
    Edge weight attributes must be numerical.

    References
    ----------
    .. [1] Raghavan, Usha Nandini, Réka Albert, and Soundar Kumara. "Near
           linear time algorithm to detect community structures in large-scale
           networks." Physical Review E 76.3 (2007): 036106.
    """

    labels = {n: i for i, n in enumerate(G)}
    cont = True
    while cont:
        cont = False
        nodes = list(G)
        random.shuffle(nodes)
        # Calculate the label for each node
        for node in nodes:
            if len(G[node]) < 1:
                continue

            # Get label frequencies. Depending on the order they are processed
            # in some nodes with be in t and others in t-1, making the
            # algorithm asynchronous.
            label_freq = Counter({labels[v]: G.edge[v][node][weight]
                                  if weight else 1 for v in G[node]})

            # Choose the label with the highest frecuency. If more than 1 label
            # has the highest frecuency choose one randomly.
            max_freq = max(label_freq.values())
            best_labels = [label for label, freq in label_freq.items()
                           if freq == max_freq]
            new_label = random.choice(best_labels)
            labels[node] = new_label
            # Continue until all nodes have a label that is better than other
            # neighbour labels (only one label has max_freq for each node).
            cont = cont or len(best_labels) > 1

    # TODO In Python 3.3 or later, this should be `yield from ...`.
    return iter(groups(labels).values())
Пример #15
0
def asyn_lpa_communities(G, weight=None, seed=None, max_iter=float("inf")):
    """Returns communities in `G` as detected by asynchronous label
    propagation.

    The asynchronous label propagation algorithm is described in
    [1]_. The algorithm is probabilistic and the found communities may
    vary on different executions.

    The algorithm proceeds as follows. After initializing each node with
    a unique label, the algorithm repeatedly sets the label of a node to
    be the label that appears most frequently among that nodes
    neighbors. The algorithm halts when each node has the label that
    appears most frequently among its neighbors. The algorithm is
    asynchronous because each node is updated without waiting for
    updates on the remaining nodes.

    This generalized version of the algorithm in [1]_ accepts edge
    weights.

    Parameters
    ----------
    G : Graph

    weight : string
        The edge attribute representing the weight of an edge.
        If None, each edge is assumed to have weight one. In this
        algorithm, the weight of an edge is used in determining the
        frequency with which a label appears among the neighbors of a
        node: a higher weight means the label appears more often.

    seed : integer, random_state, or None (default)
        Indicator of random number generation state.
        See :ref:`Randomness<randomness>`.

    max_iter: integer or float('inf') (default).  Maximum number of iteration
        to go through before the algorithm exits.

    Returns
    -------
    communities : iterable
        Iterable of communities given as sets of nodes.

    Notes
    ------
    Edge weight attributes must be numerical.

    References
    ----------
    .. [1] Raghavan, Usha Nandini, Réka Albert, and Soundar Kumara. "Near
           linear time algorithm to detect community structures in large-scale
           networks." Physical Review E 76.3 (2007): 036106.
    """
    labels = {n: i for i, n in enumerate(G)}
    cont = True
    c = 0
    while cont and c < max_iter:
        c += 1
        cont = False
        nodes = list(G)
        seed.shuffle(nodes)
        # Calculate the label for each node
        for node in nodes:
            if len(G[node]) < 1:
                continue

            # Get label frequencies. Depending on the order they are processed
            # in some nodes with be in t and others in t-1, making the
            # algorithm asynchronous.
            label_freq = Counter()
            for v in G[node]:
                label_freq.update(
                    {labels[v]: G.edges[node, v][weight] if weight else 1}
                )
            # Choose the label with the highest frecuency. If more than 1 label
            # has the highest frecuency choose one randomly.
            max_freq = max(label_freq.values())
            best_labels = [
                label for label, freq in label_freq.items() if freq == max_freq
            ]

            # Continue until all nodes have a majority label
            if labels[node] not in best_labels:
                labels[node] = seed.choice(best_labels)
                cont = True

    yield from groups(labels).values()
Пример #16
0
def asyn_fluidcWeight(G, k, max_iter=100, seed=None):
    """Returns communities in `G` as detected by Fluid Communities algorithm.

    The asynchronous fluid communities algorithm is described in
    [1]_. The algorithm is based on the simple idea of fluids interacting
    in an environment, expanding and pushing each other. Its initialization is
    random, so found communities may vary on different executions.

    The algorithm proceeds as follows. First each of the initial k communities
    is initialized in a random vertex in the graph. Then the algorithm iterates
    over all vertices in a random order, updating the community of each vertex
    based on its own community and the communities of its neighbours. This
    process is performed several times until convergence.
    At all times, each community has a total density of 1, which is equally
    distributed among the vertices it contains. If a vertex changes of
    community, vertex densities of affected communities are adjusted
    immediately. When a complete iteration over all vertices is done, such that
    no vertex changes the community it belongs to, the algorithm has converged
    and returns.

    This is a modified version of the algorithm described in [1]_.
    This version uses the density aggregate multiplied by the edge weights to determin community

    Parameters
    ----------
    G : Graph

    k : integer
        The number of communities to be found.

    max_iter : integer
        The number of maximum iterations allowed. By default 100.

    seed : integer, random_state, or None (default)
        Indicator of random number generation state.
        See :ref:`Randomness<randomness>`.

    Returns
    -------
    communities : iterable
        Iterable of communities given as sets of nodes.

    Notes
    -----
    k variable is not an optional argument.

    References
    ----------
    .. [1] Parés F., Garcia-Gasulla D. et al. "Fluid Communities: A
       Competitive and Highly Scalable Community Detection Algorithm".
       [https://arxiv.org/pdf/1703.09307.pdf].
    """
    # Initial checks
    if not isinstance(k, int):
        raise NetworkXError("k must be an integer.")
    if not k > 0:
        raise NetworkXError("k must be greater than 0.")
    if not is_connected(G):
        raise NetworkXError("Fluid Communities require connected Graphs.")
    if len(G) < k:
        raise NetworkXError("k cannot be bigger than the number of nodes.")
    # Initialization
    max_density = 1.0
    vertices = list(G)
    seed.shuffle(vertices)
    communities = {n: i for i, n in enumerate(vertices[:k])}
    density = {}
    com_to_numvertices = {}
    for vertex in communities.keys():
        com_to_numvertices[communities[vertex]] = 1
        G.nodes[vertex]["density"] = 1
        density[communities[vertex]] = max_density
    # Set up control variables and start iterating
    iter_count = 0
    cont = True
    while cont:
        cont = False
        iter_count += 1
        # Loop over all vertices in graph in a random order
        vertices = list(G)
        seed.shuffle(vertices)
        for vertex in vertices:
            # Updating rule
            com_counter = Counter()
            weight_counter = Counter()
            # Take into account self vertex community
            try:
                com_counter.update(
                    {communities[vertex]: density[communities[vertex]]})
            except KeyError:
                pass
            # Gather neighbour vertex communities
            for v in G[vertex]:
                try:
                    com_counter.update(
                        {communities[v]: density[communities[v]]})
                    weight_counter.update(
                        {communities[v]: G.edges[v, vertex]['weight']})
                except KeyError:
                    continue

            combined = {}
            for k, v in com_counter.items():
                if weight_counter.get(k) is not None:
                    temp = v * weight_counter.get(k)
                    combined.update({k: temp})

            # Check which is the community with highest density
            new_com = -1
            if len(com_counter.keys()) > 0:
                if combined:
                    max_combined = max(combined.values())
                best_communities = [
                    com for com, freq in com_counter.items()
                    if (max_combined == combined.get(com))
                ]
                # If actual vertex com in best communities, it is preserved
                try:
                    if communities[vertex] in best_communities:
                        new_com = communities[vertex]
                except KeyError:
                    pass

                # If vertex community changes...
                if new_com == -1:
                    # Set flag of non-convergence
                    cont = True
                    # Randomly chose a new community from candidates
                    if best_communities:
                        new_com = seed.choice(best_communities)

                    # Update previous community status
                    try:
                        if com_to_numvertices[communities[vertex]] > 1:
                            com_to_numvertices[communities[vertex]] -= 1
                            density[communities[vertex]] = (
                                max_density /
                                com_to_numvertices[communities[vertex]])
                    except KeyError:
                        pass
                    # Update new community status
                    communities[vertex] = new_com
                    com_to_numvertices[communities[vertex]] += 1
                    density[communities[vertex]] = (
                        max_density / com_to_numvertices[communities[vertex]])
        # If maximum iterations reached --> output actual results
        if iter_count > max_iter:
            break
    # Return results by grouping communities as list of vertices
    return iter(groups(communities).values())


# if __name__ == '__main__':

#     name3 = "/home/james/4F90/sg_infectious_graphs/weightededgesX_2009_05_06.out"
#     name = "/home/james/4F90/sg_infectious_graphs/weightededgesX_2009_07_15.out"
# fh2 = open(name, "rb")
# fh3 = open(name3, "rb")
# my_graph2 = nx.read_weighted_edgelist(fh3)
# testg = nx.read_weighted_edgelist(fh3)
# fh2.close()
# graphs = (my_graph2.subgraph(c) for c in nx.connected_components(my_graph2))
# graphs = list(graphs)
# community = asyn_fluidcWeight(my_graph2, 10, seed=1)
# fluid = nx.algorithms.community.asyn_fluidc(my_graph2, 13, seed=10)
# defaultFLuid = algorithms.async_fluid(my_graph2, 13)
# louvain = algorithms.louvain(my_graph2, weight='weight')
# com1 = []
# com2 = []
# coms1 = [list(x) for x in community]
# fluid2 = [list(x) for x in fluid]
# coms2 = cdlib.NodeClustering(coms1, my_graph2, "FluidWeight")
# fluid3 = cdlib.NodeClustering(fluid2, my_graph2, "FluidWeight")

# pos = nx.spring_layout(testg, weight='weight',seed=5)
# pos = nx.nx_pydot.graphviz_layout(testg)
# wcom = asyn_fluidcWeight(testg, 10, seed=3)
# wcoms = [list(x) for x in wcom]
# wcoms2 = cdlib.NodeClustering(wcoms, testg, "FluidWeight")

# print(evaluation.newman_girvan_modularity(testg, wcoms2).score)

# labels = nx.get_edge_attributes(testg, 'weight')
# viz.plot_network_clusters(testg, wcoms2, pos,figsize=(20,20),node_size=600,cmap='gist_rainbow', plot_labels=False)
# nx.draw_networkx_edge_labels(testg,pos, edge_labels=labels, font_size=6)
# nx.draw_networkx_labels(testg, pos, font_size=8)
# plt.savefig("Algo2_10com.png")
# plt.show()
# plt.close()

# resolut = {}
# resolut["5"] = 4
# resolut["7"] = 2.5
# resolut["10"] = 1.41
# resolut["13"] = 1
# resolut["15"] = 0.9
# resolut["17"] = 0.72
# resolut["20"] = 0.6
# resolutions = [4,2.5,1.41,1,0.9,0.72,0.6]
# louvain = algorithms.louvain(my_graph2, weight='weight', resolution=1)

# count = 0
# for i in fluid3.communities:
#     count = count +1
# print(count)

# count = 0
# for i in louvain.communities:
#     count = count +1
# print(count)

# with open('algo2fluidcontrol20comm.txt', 'w') as f:
#     count = 0
#     s = 0
#     scores = []
#     while count <30:
#         try:
#             print("seed: "+ str(s))
#             print("seed: "+ str(s),file=f)
#             wcom = asyn_fluidcWeight(my_graph2, 20, seed=s)
#             wcoms = [list(x) for x in wcom]
#             wcoms2 = cdlib.NodeClustering(wcoms, my_graph2, "FluidWeight")
#             fluid = nx.algorithms.community.asyn_fluidc(my_graph2, 20, seed=s)
#             fluid2 = [list(x) for x in fluid]
#             fluid3 = cdlib.NodeClustering(fluid2, my_graph2, "FluidWeight")
#             print("weightedfluid")
#             print(wcoms2.communities)
#             print("Benchmark Fluid")
#             print(fluid3.communities)
#             print("weightedfluid", file=f)
#             print(wcoms2.communities, file=f)
#             print("Benchmark Fluid", file=f)
#             print(fluid3.communities,file=f)
#             scores.append(evaluation.adjusted_rand_index(wcoms2, fluid3).score)
#             print(evaluation.adjusted_rand_index(wcoms2, fluid3), file=f)
#             count+=1
#             s+=1
#         except:
#             # print("Something went wrong with seed: "+ str(s))
#             # print("Something went wrong with seed: "+ str(s),file=f)
#             s+=1
#     print("Adjusted rand indexes")
#     print("Adjusted rand indexes", file=f)
#     print(scores)
#     print(scores, file=f)
#     print("Mean")
#     print("Mean", file=f)
#     print(numpy.mean(scores))
#     print(numpy.mean(scores),file=f)
#     print("Standard deviation")
#     print("Standard deviation", file=f)
#     print(numpy.std(scores))
#     print(numpy.std(scores), file=f)

# with open('algo2louvainnorand20comm.txt', 'w') as f:
#     count = 0
#     s = 0
#     scores = []
#     while count <30:
#         try:
#             print("seed: "+ str(s))
#             print("seed: "+ str(s),file=f)
#             wcom = asyn_fluidcWeight(my_graph2, 20, seed=s)
#             wcoms = [list(x) for x in wcom]
#             wcoms2 = cdlib.NodeClustering(wcoms, my_graph2, "FluidWeight")
#             louvain = algorithms.louvain(my_graph2, weight='weight', resolution=0.4)
#             print("weightedfluid")
#             print(wcoms2.communities)
#             print("Benchmark Fluid")
#             print(louvain.communities)
#             print("weightedfluid", file=f)
#             print(wcoms2.communities, file=f)
#             print("Benchmark Fluid", file=f)
#             print(louvain.communities,file=f)
#             print(evaluation.adjusted_rand_index(wcoms2, louvain))
#             print(evaluation.adjusted_rand_index(wcoms2, louvain),file=f)
#             scores.append(evaluation.adjusted_rand_index(wcoms2, louvain).score)
#             count+=1
#             s+=1
#         except:
#             # print("Something went wrong with seed: "+ str(s))
#             # print("Something went wrong with seed: "+ str(s),file=f)
#             s+=1
#     print("Adjusted rand indexes")
#     print("Adjusted rand indexes", file=f)
#     print(scores)
#     print(scores, file=f)
#     print("Mean")
#     print("Mean", file=f)
#     print(numpy.mean(scores))
#     print(numpy.mean(scores),file=f)
#     print("Standard deviation")
#     print("Standard deviation", file=f)
#     print(numpy.std(scores))
#     print(numpy.std(scores), file=f)

# with open('algo2louvainrand20comm.txt', 'w') as f:
#     count = 0
#     s = 0
#     scores = []
#     while count <30:
#         try:
#             print("seed: "+ str(s))
#             print("seed: "+ str(s),file=f)
#             wcom = asyn_fluidcWeight(my_graph2, 20, seed=s)
#             wcoms = [list(x) for x in wcom]
#             wcoms2 = cdlib.NodeClustering(wcoms, my_graph2, "FluidWeight")
#             #Adjust resolution to get community size [4,2.5,1.41,1,0.9,0.72,0.6] -> [5,7,10,13,15,17,20]
#             louvain = algorithms.louvain(my_graph2, weight='weight',randomize=1, resolution=0.4)
#             print("weightedfluid")
#             print(wcoms2.communities)
#             print("Benchmark Fluid")
#             print(louvain.communities)
#             print("weightedfluid", file=f)
#             print(wcoms2.communities, file=f)
#             print("Benchmark Fluid", file=f)
#             print(louvain.communities,file=f)
#             print(evaluation.adjusted_rand_index(wcoms2, louvain))
#             print(evaluation.adjusted_rand_index(wcoms2, louvain),file=f)
#             scores.append(evaluation.adjusted_rand_index(wcoms2, louvain).score)
#             count+=1
#             s+=1
#         except:
#             # print("Something went wrong with seed: "+ str(s))
#             # print("Something went wrong with seed: "+ str(s),file=f)
#             s+=1
#     print("Adjusted rand indexes")
#     print("Adjusted rand indexes", file=f)
#     print(scores)
#     print(scores, file=f)
#     print("Mean")
#     print("Mean", file=f)
#     print(numpy.mean(scores))
#     print(numpy.mean(scores),file=f)
#     print("Standard deviation")
#     print("Standard deviation", file=f)
#     print(numpy.std(scores))
#     print(numpy.std(scores), file=f)

# name2 = "/content/drive/MyDrive/4F90/sg_infectious_graphs/sg_infectious_graphs/weightededgesX_2009_06_02.out"
# name = "/content/drive/MyDrive/4F90/sg_infectious_graphs/sg_infectious_graphs/weightededgesX_2009_07_15.out"
# fh2 = open(name, "rb")
# my_graph2 = nx.read_weighted_edgelist(fh2)
# fh2.close()
# graphs = (my_graph2.subgraph(c) for c in nx.connected_components(my_graph2))
# graphs = list(graphs)

# com1 = []
# com2 = []

# with open('algo2fluidcontrol.txt', 'w') as f:
#   count = 0
#   s = 0
#   while count <30:
#     try:

#       wcom =  asyn_fluidcWeight(my_graph2, 10, seed=s)
#       print("seed: "+ str(s))
#       print("seed: "+ str(s),file=f)
#       wcoms = [list(x) for x in wcom]
#       wcoms2 = cdlib.NodeClustering(wcoms, my_graph2, "FluidWeight")
#       fluid = nx.algorithms.community.asyn_fluidc(my_graph2, 10, seed=s)
#       fluid2 = [list(x) for x in fluid]
#       fluid3 = cdlib.NodeClustering(fluid2, my_graph2, "FluidWeight")
#       print(evaluation.adjusted_rand_index(wcoms2, fluid3))
#       print(evaluation.adjusted_rand_index(wcoms2, fluid3), file=f)
#   count+=1
#   s+=1
# except:
#   # print("Something went wrong with seed: "+ str(s))
#   # print("Something went wrong with seed: "+ str(s),file=f)
#   s+=1

# with open('algo2louvainnorand.txt', 'w') as f:
#   count = 0
#   while count <30:
#     try:

#       wcom =  asyn_fluidcWeight(my_graph2, 10, seed=s)
#       print("seed: "+ str(s))
#       print("seed: "+ str(s),file=f)
#       wcoms = [list(x) for x in wcom]
#       wcoms2 = cdlib.NodeClustering(wcoms, my_graph2, "FluidWeight")
#       louvain = algorithms.louvain(my_graph2, weight='weight')
#       print(evaluation.adjusted_rand_index(wcoms2, louvain))
#       print(evaluation.adjusted_rand_index(wcoms2, louvain),file=f)
#       count+=1
#       s+=1
#     except:
#       # print("Something went wrong with seed: "+ str(s))
#       # print("Something went wrong with seed: "+ str(s),file=f)
#       s+=1

# with open('algo2louvainrand.txt', 'w') as f:
#   count = 0
#   while count <30:
#     try:

#       wcom =  asyn_fluidcWeight(my_graph2, 10, seed=s)
#       print("seed: "+ str(s))
#       print("seed: "+ str(s),file=f)
#       wcoms = [list(x) for x in wcom]
#       wcoms2 = cdlib.NodeClustering(wcoms, my_graph2, "FluidWeight")
#       louvain = algorithms.louvain(my_graph2, weight='weight',randomize=1)
#       print(evaluation.adjusted_rand_index(wcoms2, louvain))
#       print(evaluation.adjusted_rand_index(wcoms2, louvain),file=f)
#       count+=1
#       s+=1
#     except:
#       # print("Something went wrong with seed: "+ str(s))
#       # print("Something went wrong with seed: "+ str(s),file=f)
#       s+=1

# for s in range(1, 30):
#     print("seed: "+ str(s))
#     try:
#       wcom = asyn_fluidcWeight(my_graph2, 10, seed=s)
#       wcoms = [list(x) for x in wcom]
#       wcoms2 = cdlib.NodeClustering(wcoms, my_graph2, "FluidWeight")
#       fluid = nx.algorithms.community.asyn_fluidc(my_graph2, 10, seed=s)
#       fluid2 = [list(x) for x in fluid]
#       fluid3 = cdlib.NodeClustering(fluid2, my_graph2, "FluidWeight")
#       print(evaluation.adjusted_rand_index(wcoms2, fluid3))
#     except:
#       print("Something went wrong with seed: "+ str(s))
def asyn_fluidc(G, k, max_iter=100, enable_pr=True):
    """Returns communities in `G` as detected by Fluid Communities algorithm.

    The asynchronous fluid communities algorithm is described in
    [1]_. The algorithm is based on the simple idea of fluids interacting
    in an environment, expanding and pushing each other. It's initialization is
    random, so found communities may vary on different executions.

    The algorithm proceeds as follows. First each of the initial k communities
    is initialized in a random vertex in the graph. Then the algorithm iterates
    over all vertices in a random order, updating the community of each vertex
    based on its own community and the communities of its neighbours. This
    process is performed several times until convergence.
    At all times, each community has a total density of 1, which is equally
    distributed among the vertices it contains. If a vertex changes of
    community, vertex densities of affected communities are adjusted
    immediately. When a complete iteration over all vertices is done, such that
    no vertex changes the community it belongs to, the algorithm has converged
    and returns.

    This is the original version of the algorithm described in [1]_.
    Unfortunately, it does not support weighted graphs yet.

    Parameters
    ----------
    G : Graph

    k : integer
        The number of communities to be found.

    max_iter : integer
        The number of maximum iterations allowed. By default 15.

    enable_pr : Enable/disable Pagerank for initialize starting points

    Returns
    -------
    communities : iterable
        Iterable of communities given as sets of nodes.

    Notes
    -----
    k variable is not an optional argument.

    References
    ----------
    .. [1] Parés F., Garcia-Gasulla D. et al. "Fluid Communities: A
       Competitive and Highly Scalable Community Detection Algorithm".
       [https://arxiv.org/pdf/1703.09307.pdf].
    """
    # Initial checks
    if not isinstance(k, int):
        raise NetworkXError("k must be an integer.")
    if not k > 0:
        raise NetworkXError("k must be greater than 0.")
    if not is_connected(G):
        raise NetworkXError("Fluid Communities can only be run on connected\
        Graphs.")
    if len(G) < k:
        raise NetworkXError("k must be greater than graph size.")
    # Initialization
    max_density = 1.0
    vertices = list(G)
    random.shuffle(vertices)
    # print "@@@",vertices
    if enable_pr:
        # Run PageRank with alpha of 0.9 the push them to the head of vertices
        #  so that it will be understand as start points
        maybe_print("PageRanks: {0}".format(pagerank(G)), 2, u'i')
        # Find the top k  keys by page rank: run pr, sort the value, then get top k key
        top_keys = [word_id for word_id,_ in list(sorted(pagerank(G).items(), key=lambda x:x[1], reverse=True))]
        # random.shuffle(top_keys[:(len(top_keys))/4])
        random.shuffle(top_keys[:(k*2)])
        top_keys = top_keys[:k]
        maybe_print("Top keys: {0}".format(top_keys), 2, u'i')
        # print "+++", top_keys
        # Remove these top keys from the vertices, then append top_key to the head
        top_keys.extend([v for v in vertices if v not in top_keys])
        # print "XXX", vertices

    communities = {n: i for i, n in enumerate(vertices[:k])}
    density = {}
    com_to_numvertices = {}
    for vertex in communities.keys():
        com_to_numvertices[communities[vertex]] = 1
        density[communities[vertex]] = max_density
    # Set up control variables and start iterating
    iter_count = 0
    cont = True
    while cont:
        cont = False
        iter_count += 1
        # Loop over all vertices in graph in a random order
        vertices = list(G)
        random.shuffle(vertices)
        for vertex in vertices:
            # Updating rule
            com_counter = Counter()
            # Take into account self vertex community
            try:
                com_counter.update({communities[vertex]:
                                    density[communities[vertex]]})
            except KeyError:
                pass
            # Gather neighbour vertex communities
            for v in G[vertex]:
                try:
                    com_counter.update({communities[v]:
                                        density[communities[v]]})
                except KeyError:
                    continue
            # Check which is the community with highest density
            new_com = -1
            if len(com_counter.keys()) > 0:
                max_freq = max(com_counter.values())
                best_communities = [com for com, freq in com_counter.items()
                                    if (max_freq - freq) < 0.0001]
                # If actual vertex com in best communities, it is preserved
                try:
                    if communities[vertex] in best_communities:
                        new_com = communities[vertex]
                except KeyError:
                    pass
                # If vertex community changes...
                if new_com == -1:
                    # Set flag of non-convergence
                    cont = True
                    # Randomly chose a new community from candidates
                    new_com = random.choice(best_communities)
                    # Update previous community status
                    try:
                        com_to_numvertices[communities[vertex]] -= 1
                        density[communities[vertex]] = max_density / \
                            com_to_numvertices[communities[vertex]]
                    except KeyError:
                        pass
                    # Update new community status
                    communities[vertex] = new_com
                    com_to_numvertices[communities[vertex]] += 1
                    density[communities[vertex]] = max_density / \
                        com_to_numvertices[communities[vertex]]
        # If maximum iterations reached --> output actual results
        if iter_count > max_iter:
            break
    # Return results by grouping communities as list of vertices
    return iter(groups(communities).values())
def asyn_lpa_communities(G, weight=None):
    """Returns communities in `G` as detected by asynchronous label
    propagation.

    The asynchronous label propagation algorithm is described in
    [1]_. The algorithm is probabilistic and the found communities may
    vary on different executions.

    The algorithm proceeds as follows. After initializing each node with
    a unique label, the algorithm repeatedly sets the label of a node to
    be the label that appears most frequently among that nodes
    neighbors. The algorithm halts when each node has the label that
    appears most frequently among its neighbors. The algorithm is
    asynchronous because each node is updated without waiting for
    updates on the remaining nodes.

    This generalized version of the algorithm in [1]_ accepts edge
    weights.

    Parameters
    ----------
    G : Graph

    weight : string
        The edge attribute representing the weight of an edge.
        If None, each edge is assumed to have weight one. In this
        algorithm, the weight of an edge is used in determining the
        frequency with which a label appears among the neighbors of a
        node: a higher weight means the label appears more often.

    Returns
    -------
    communities : iterable
        Iterable of communities given as sets of nodes.

    Notes
    ------
    Edge weight attributes must be numerical.

    References
    ----------
    .. [1] Raghavan, Usha Nandini, Réka Albert, and Soundar Kumara. "Near
           linear time algorithm to detect community structures in large-scale
           networks." Physical Review E 76.3 (2007): 036106.
    """

    labels = {n: i for i, n in enumerate(G)}
    cont = True
    while cont:
        cont = False
        nodes = list(G)
        random.shuffle(nodes)
        # Calculate the label for each node
        for node in nodes:
            if len(G[node]) < 1:
                continue

            # Get label frequencies. Depending on the order they are processed
            # in some nodes with be in t and others in t-1, making the
            # algorithm asynchronous.
            label_freq = Counter()
            for v in G[node]:
                label_freq.update(
                    {labels[v]: G.edges[v, node][weight] if weight else 1})
            # Choose the label with the highest frecuency. If more than 1 label
            # has the highest frecuency choose one randomly.
            max_freq = max(label_freq.values())
            best_labels = [
                label for label, freq in label_freq.items() if freq == max_freq
            ]
            new_label = random.choice(best_labels)
            labels[node] = new_label
            # Continue until all nodes have a label that is better than other
            # neighbour labels (only one label has max_freq for each node).
            cont = cont or len(best_labels) > 1

    # TODO In Python 3.3 or later, this should be `yield from ...`.
    return iter(groups(labels).values())