Exemplo n.º 1
0
    def test_nx_integer_mapping(self):
        g = nx.karate_club_graph()
        g, node_map = utils.nx_node_integer_mapping(g)
        self.assertIsNone(node_map)

        g = get_string_graph()
        nodes = list(g.nodes())
        g, node_map = utils.nx_node_integer_mapping(g)
        self.assertListEqual(sorted(nodes), sorted(list(node_map.values())))
Exemplo n.º 2
0
    def test_remap_node_community(self):
        g = get_string_graph()
        nodes = list(g.nodes())
        g, node_map = utils.nx_node_integer_mapping(g)

        coms = algorithms.louvain(g)
        coms_remap = utils.remap_node_communities(coms.communities, node_map)

        flat_list = [item for sublist in coms_remap for item in sublist]
        self.assertListEqual(sorted(nodes), sorted(flat_list))
Exemplo n.º 3
0
def frc_fgsn(g, theta, eps, r):
    """Fuzzy-Rough Community Detection on Fuzzy Granular model of Social Network.

    FRC-FGSN assigns nodes to communities specifying the probability of each association.
    The flattened partition ensure that each node is associated to the community that maximize such association probability.
    FRC-FGSN may generate orphan nodes (i.e., nodes not assigned to any community).

    :param graph: networkx/igraph object
    :param theta: community density coefficient
    :param eps: coupling coefficient of the community. Ranges in [0, 1], small values ensure that only strongly connected node granules are merged togheter.
    :param r: radius of the granule (int)
    :return: FuzzyNodeClustering object


    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = frc_fgsn(G, theta=1, eps=0.5, r=3)


    :References:

    Kundu, S., & Pal, S. K. (2015). `Fuzzy-rough community in social networks. <https://www.sciencedirect.com/science/article/pii/S0167865515000537/>`_ Pattern Recognition Letters, 67, 145-152.

    .. note:: Reference implementation: https://github.com/nidhisridhar/Fuzzy-Community-Detection
    """

    graph = convert_graph_formats(g, nx.Graph)
    g, maps = nx_node_integer_mapping(graph)

    communities, fuzz_assoc = fuzzy_comm(graph, theta, eps, r)

    if maps is not None:
        coms = []
        for c in communities:
            coms.append([tuple(maps[n]) for n in c])

        nx.relabel_nodes(g, maps, False)
        fuzz_assoc = {maps[nid]: v for nid, v in fuzz_assoc.items()}
    else:
        coms = [list(c) for c in communities]

    return FuzzyNodeClustering(coms,
                               fuzz_assoc,
                               graph,
                               "FuzzyComm",
                               method_parameters={
                                   "theta": theta,
                                   "eps": eps,
                                   "r": r
                               })
Exemplo n.º 4
0
def markov_clustering(g, max_loop=1000):
    """
    The Markov clustering algorithm (MCL) is based on simulation of (stochastic) flow in graphs.
    The MCL algorithm finds cluster structure in graphs by a mathematical bootstrapping procedure. The process deterministically computes (the probabilities of) random walks through the graph, and uses two operators transforming one set of probabilities into another. It does so using the language of stochastic matrices (also called Markov matrices) which capture the mathematical concept of random walks on a graph.
    The MCL algorithm simulates random walks within a graph by alternation of two operators called expansion and inflation.

    :param g: a networkx/igraph object
    :param max_loop: maximum number of iterations, default 1000
    :return: EdgeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.markov_clustering(G, max_loop=1000)

    :References:

    Enright, Anton J., Stijn Van Dongen, and Christos A. Ouzounis. `An efficient algorithm for large-scale detection of protein families. <https://www.ncbi.nlm.nih.gov/pubmed/11917018/>`_ Nucleic acids research 30.7 (2002): 1575-1584.

    .. note:: Reference implementation: https://github.com/HarshHarwani/markov-clustering-for-graphs
    """

    g = convert_graph_formats(g, nx.Graph)
    g, maps = nx_node_integer_mapping(g)

    coms = markov(g, max_loop)

    if maps is not None:
        communities = []
        for c in coms:
            com = []
            for e in c:
                com.append(tuple([maps[n] for n in e]))
            communities.append(com)

        nx.relabel_nodes(g, maps, False)
    else:
        communities = [list(c) for c in coms]

    return EdgeClustering(communities,
                          g,
                          "Markov Clustering",
                          method_parameters={"max_loop": max_loop})
Exemplo n.º 5
0
def multicom(g_original, seed_node):
    """
    MULTICOM is an algorithm for detecting multiple local communities, possibly overlapping, by expanding the initial seed set.
    This algorithm uses local scoring metrics to define an embedding of the graph around the seed set. Based on this embedding, it picks new seeds in the neighborhood of the original seed set, and uses these new seeds to recover multiple communities.

    :param g_original: a networkx/igraph object
    :param seed_node: Id of the seed node around which we want to detect communities.
    :return: EdgeClustering object


    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> coms = algorithms.multicom(G, seed_node=0)

    :References:

    Hollocou, Alexandre, Thomas Bonald, and Marc Lelarge. `Multiple Local Community Detection. <https://hal.archives-ouvertes.fr/hal-01625444/document/>`_ ACM SIGMETRICS Performance Evaluation Review 45.2 (2018): 76-83.

    .. note:: Reference implementation: https://github.com/ahollocou/multicom

    """

    g = convert_graph_formats(g_original, nx.Graph)
    g, maps = nx_node_integer_mapping(g)

    mc = MultiCom(g)
    coms = mc.execute(seed_node)

    if maps is not None:
        communities = []
        for c in coms:
            communities.append([maps[n] for n in c])
        nx.relabel_nodes(g, maps, False)
    else:
        communities = [list(c) for c in coms]

    return NodeClustering(communities,
                          g_original,
                          "Multicom",
                          method_parameters={"seeds": seed_node},
                          overlap=True)
Exemplo n.º 6
0
def gdmp2(g, min_threshold=0.75):
    """
    Gdmp2 is a method for identifying a set of dense subgraphs of a given sparse graph.
    It is inspired by an effective technique designed for a similar problem—matrix blocking, from a different discipline (solving linear systems).

    :param g: a networkx/igraph object
    :param min_threshold:  the minimum density threshold parameter to control the density of the output subgraphs, default 0.75
    :return: NodeClustering object


    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> com = algorithms.gdmp2(G)

    :References:

    Chen, Jie, and Yousef Saad. `Dense subgraph extraction with application to community detection. <https://ieeexplore.ieee.org/document/5677532/>`_ IEEE Transactions on Knowledge and Data Engineering 24.7 (2012): 1216-1230.

    .. note:: Reference implementation: https://github.com/imabhishekl/CSC591_Community_Detection
    """

    g = convert_graph_formats(g, nx.Graph)
    g, maps = nx_node_integer_mapping(g)

    coms = GDMP2(g, min_threshold)

    if maps is not None:
        communities = []
        for c in coms:
            communities.append([maps[n] for n in c])
        nx.relabel_nodes(g, maps, False)
    else:
        communities = coms

    return NodeClustering(communities,
                          g,
                          "GDMP2",
                          method_parameters={"min_threshold": min_threshold})
Exemplo n.º 7
0
def em(g, k):
    """
    EM is based on based on a mixture model.
    The algorithm uses the expectation–maximization algorithm to detect structure in networks.

    :param g: a networkx/igraph object
    :param k: the number of desired communities
    :return: NodeClustering object

    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> com = algorithms.em(G, k=3)

    :References:

    Newman, Mark EJ, and Elizabeth A. Leicht. `Mixture community and exploratory analysis in networks.  <https://www.pnas.org/content/104/23/9564/>`_  Proceedings of the National Academy of Sciences 104.23 (2007): 9564-9569.
    """

    g = convert_graph_formats(g, nx.Graph)
    g, maps = nx_node_integer_mapping(g)

    algorithm = EM_nx(g, k)
    coms = algorithm.execute()

    if maps is not None:
        communities = []
        for c in coms:
            communities.append([maps[n] for n in c])
        nx.relabel_nodes(g, maps, False)
    else:
        communities = [list(c) for c in coms]

    return NodeClustering(communities, g, "EM", method_parameters={"k": k})
Exemplo n.º 8
0
def overlapping_seed_set_expansion(g, seeds, ninf=False, expansion='ppr', stopping='cond', nworkers=1,
                                   nruns=13, alpha=0.99, maxexpand=float('INF'), delta=0.2):
    """
    OSSE is an overlapping community detection algorithm optimizing the conductance community score
    The algorithm uses a seed set expansion approach; the key idea is to find good seeds, and then expand these seed sets using the personalized PageRank clustering procedure.

    :param g: a networkx/igraph object
    :param seeds: Node list
    :param ninf: Neighbourhood Inflation parameter (boolean)
    :param expansion: Seed expansion: ppr or vppr
    :param stopping: Stopping criteria: cond
    :param nworkers: Number of Workers: default 1
    :param nruns: Number of runs: default 13
    :param alpha: alpha value for Personalized PageRank expansion: default 0.99
    :param maxexpand: Maximum expansion allowed for approximate ppr: default INF
    :param delta: Minimum distance parameter for near duplicate communities: default 0.2
    :return: NodeClustering object


    :Example:

    >>> from cdlib import algorithms
    >>> import networkx as nx
    >>> G = nx.karate_club_graph()
    >>> com = algorithms.overlapping_seed_set_expansion(G)

    :References:

    1.Whang, J. J., Gleich, D. F., & Dhillon, I. S. (2013, October). `Overlapping community detection using seed set expansion. <http://www.cs.utexas.edu/~inderjit/public_papers/overlapping_commumity_cikm13.pdf/>`_ In Proceedings of the 22nd ACM international conference on Conference on information & knowledge management (pp. 2099-2108). ACM.

    .. note:: Reference implementation: https://github.com/pratham16/algorithms-detection-by-seed-expansion
    """

    g = convert_graph_formats(g, nx.Graph)

    g, maps = nx_node_integer_mapping(g)
    if maps is not None:
        rev_map = {v: k for k, v in maps.items()}
        seeds = [rev_map[s] for s in seeds]

    if ninf:
        seeds = OSSE.neighbor_inflation(g, seeds)

    communities = OSSE.growclusters(g, seeds, expansion, stopping, nworkers, nruns, alpha, maxexpand, False)
    communities = OSSE.remove_duplicates(g, communities, delta)
    communities = list(communities)

    if maps is not None:
        coms = []
        for com in communities:
            coms.append([maps[n] for n in com])
        nx.relabel_nodes(g, maps, False)
    else:
        coms = communities

    return NodeClustering(coms, g, "Overlapping SSE", method_parameters={"seeds": seeds, "ninf": ninf,
                                                                         "expansion": expansion,
                                                                         "stopping": stopping,
                                                                         "nworkers": nworkers,
                                                                         "nruns": nruns, "alpha": alpha,
                                                                         "maxexpand": maxexpand,
                                                                         "delta": delta},
                          overlap=True)