Пример #1
0
 def test_angel(self):
     g = get_string_graph()
     coms = algorithms.angel(g, threshold=0.25)
     self.assertEqual(type(coms.communities), list)
     if len(coms.communities) > 0:
         self.assertEqual(type(coms.communities[0]), list)
         self.assertEqual(type(coms.communities[0][0]), str)
Пример #2
0
    def test_community_graph(self):

        g = nx.karate_club_graph()
        coms = algorithms.louvain(g)
        viz.plot_community_graph(g, coms)

        plt.savefig("cg.pdf")
        os.remove("cg.pdf")

        coms = algorithms.angel(g, 0.25)
        viz.plot_community_graph(g, coms, plot_overlaps=True, plot_labels=True)

        plt.savefig("cg.pdf")
        os.remove("cg.pdf")
Пример #3
0
def community_discoverying_algorithms(g):
    """
    All Community Discovery algorithms generate as result a NodeClustering object, allowing
    also for the generation of a JSON representation of the results. Then evaluate the clusters with fitness
    functions (ex. synthetic representation of its min/max/mean/std values ORD communitiy-wise value)
    """

    print("Starting computing angel_coms")
    angel_coms = algorithms.angel(g.to_undirected(), threshold=0.25)
    write_on_file(angel_coms, "communities/angel.json")
    draw_community_graph(g, angel_coms, "communities/angel.png")
    print("END")

    print("Starting computing infomap_coms")
    infomap_coms = algorithms.infomap(g.to_undirected())
    write_on_file(infomap_coms, "communities/infomap.json")
    draw_community_graph(g, infomap_coms, "communities/infomap.png")
    print("END")

    print("Starting computing louvain_coms")
    louvain_coms = algorithms.louvain(g.to_undirected())
    write_on_file(louvain_coms, "communities/louvain.json")
    draw_community_graph(g, louvain_coms, "communities/louvain.png")
    print("END")

    print("Starting computing labelpropagation_coms")
    labelpropagation_coms = algorithms.label_propagation(g.to_undirected())
    write_on_file(labelpropagation_coms, "communities/labelpropagation.json")
    draw_community_graph(g, labelpropagation_coms,
                         "communities/labelpropagation.png")
    print("END")

    draw_cluster_violin_map(
        [angel_coms, infomap_coms, louvain_coms, labelpropagation_coms])
    draw_cluster_heatmap(
        [angel_coms, infomap_coms, louvain_coms, labelpropagation_coms])

    draw_plot_map([angel_coms, infomap_coms], 1)
    draw_plot_map([angel_coms, louvain_coms], 2)
    draw_plot_map([angel_coms, labelpropagation_coms], 3)
    draw_plot_map([infomap_coms, louvain_coms], 4)
    draw_plot_map([infomap_coms, labelpropagation_coms], 5)
    draw_plot_map([louvain_coms, labelpropagation_coms], 6)
Пример #4
0
    def test_nx_cluster(self):

        g = nx.karate_club_graph()
        coms = algorithms.louvain(g)
        pos = nx.spring_layout(g)
        viz.plot_network_clusters(g, coms, pos)

        plt.savefig("cluster.pdf")
        os.remove("cluster.pdf")

        coms = algorithms.angel(g, 0.25)
        pos = nx.spring_layout(g)
        viz.plot_network_clusters(g,
                                  coms,
                                  pos,
                                  plot_labels=True,
                                  plot_overlaps=True)

        plt.savefig("cluster.pdf")
        os.remove("cluster.pdf")
    def test_ranking(self):
        g = nx.karate_club_graph()
        coms = algorithms.louvain(g)
        coms2 = algorithms.demon(g, 0.25)
        coms3 = algorithms.label_propagation(g)
        coms4 = algorithms.angel(g, 0.6)

        rk = evaluation.FitnessRanking(g, [coms2, coms, coms3, coms4])

        rk.rank(evaluation.fraction_over_median_degree)
        rk.rank(evaluation.edges_inside)
        rk.rank(evaluation.cut_ratio)
        rk.rank(evaluation.erdos_renyi_modularity)
        rk.rank(evaluation.newman_girvan_modularity)
        rk.rank(evaluation.modularity_density)

        rnk, _ = rk.topsis()
        self.assertEqual(len(rnk), 4)

        pc = rk.bonferroni_post_hoc()
        self.assertLessEqual(len(pc), 4)
Пример #6
0
def find_communities(nnodes, edges, alg, params=None):
    def membership2cs(membership):
        cs = {}
        for i, m in enumerate(membership):
            cs.setdefault(m, []).append(i)
        return cs.values()

    def connected_subgraphs(G: nx.Graph):
        for comp in nx.connected_components(G):
            sub = nx.induced_subgraph(G, comp)
            sub = nx.convert_node_labels_to_integers(sub,
                                                     label_attribute='old')
            yield sub

    def apply_subgraphs(algorithm, **params):
        cs = []
        for sub in connected_subgraphs(G):
            if len(sub.nodes) <= 3:
                coms = [sub.nodes]  # let it be a cluster
            else:
                coms = algorithm(sub, **params)
                if hasattr(coms, 'communities'):
                    coms = coms.communities

            for com in coms:
                cs.append([sub.nodes[i]['old'] for i in set(com)])
        return cs

    def karate_apply(algorithm, graph, **params):
        model = algorithm(**params)
        model.fit(graph)
        return membership2cs(model.get_memberships().values())

    if alg == 'big_clam':
        c = -1 if params['c'] == 'auto' else int(params['c'])
        cs = BigClam('../../snap').run(edges, c=c, xc=int(params['xc']))
    elif alg in ('gmm', 'kclique', 'lprop', 'lprop_async', 'fluid',
                 'girvan_newman', 'angel', 'congo', 'danmf', 'egonet_splitter',
                 'lfm', 'multicom', 'nmnf', 'nnsed', 'node_perception', 'slpa',
                 'GEMSEC', 'EdMot', 'demon'):
        G = nx.Graph()
        G.add_edges_from(edges)

        if alg == 'gmm':
            cs = community.greedy_modularity_communities(G)
        elif alg == 'kclique':
            params = {k: float(v) for k, v in params.items()}
            cs = community.k_clique_communities(G, **params)
        elif alg == 'lprop':
            cs = community.label_propagation_communities(G)
        elif alg == 'lprop_async':
            cs = community.asyn_lpa_communities(G, seed=0)
        elif alg == 'fluid':
            params = {k: int(v) for k, v in params.items()}
            params['seed'] = 0
            cs = apply_subgraphs(community.asyn_fluidc, **params)
        elif alg == 'girvan_newman':
            comp = community.girvan_newman(G)
            for cs in itertools.islice(comp, int(params['k'])):
                pass
        elif alg == 'angel':
            params = {k: float(v) for k, v in params.items()}
            cs = cdlib.angel(G, **params).communities
        elif alg == 'congo':  # too slow
            ncoms = int(params['number_communities'])
            cs = []
            for sub in connected_subgraphs(G):
                if len(sub.nodes) <= max(3, ncoms):
                    cs.append(sub.nodes)  # let it be a cluster
                else:
                    coms = cdlib.congo(sub,
                                       number_communities=ncoms,
                                       height=int(params['height']))
                    for com in coms.communities:
                        cs.append([sub.nodes[i]['old'] for i in set(com)])
        elif alg == 'danmf':  # no overlapping
            cs = apply_subgraphs(cdlib.danmf)
        elif alg == 'egonet_splitter':
            params['resolution'] = float(params['resolution'])
            cs = apply_subgraphs(cdlib.egonet_splitter, **params)
        elif alg == 'lfm':
            coms = cdlib.lfm(G, float(params['alpha']))
            cs = coms.communities
        elif alg == 'multicom':
            cs = cdlib.multicom(G, seed_node=0).communities
        elif alg == 'nmnf':
            params = {k: int(v) for k, v in params.items()}
            cs = apply_subgraphs(cdlib.nmnf, **params)
        elif alg == 'nnsed':
            cs = apply_subgraphs(cdlib.nnsed)
        elif alg == 'node_perception':  # not usable
            params = {k: float(v) for k, v in params.items()}
            cs = cdlib.node_perception(G, **params).communities
        elif alg == 'slpa':
            params["t"] = int(params["t"])
            params["r"] = float(params["r"])
            cs = cdlib.slpa(G, **params).communities
        elif alg == 'demon':
            params = {k: float(v) for k, v in params.items()}
            cs = cdlib.demon(G, **params).communities
        elif alg == 'GEMSEC':
            # gamma = float(params.pop('gamma'))
            params = {k: int(v) for k, v in params.items()}
            # params['gamma'] = gamma
            params['seed'] = 0
            _wrap = partial(karate_apply, karateclub.GEMSEC)
            cs = apply_subgraphs(_wrap, **params)
        elif alg == 'EdMot':
            params = {k: int(v) for k, v in params.items()}
            _wrap = partial(karate_apply, karateclub.EdMot)
            cs = apply_subgraphs(_wrap, **params)

    elif alg in ('infomap', 'community_leading_eigenvector', 'leig',
                 'multilevel', 'optmod', 'edge_betweenness', 'spinglass',
                 'walktrap', 'leiden', 'hlc'):
        G = igraph.Graph()
        G.add_vertices(nnodes)
        G.add_edges(edges)

        if alg == 'infomap':
            vcl = G.community_infomap(trials=int(params['trials']))
            cs = membership2cs(vcl.membership)
        elif alg == 'leig':
            clusters = None if params['clusters'] == 'auto' else int(
                params['clusters'])
            vcl = G.community_leading_eigenvector(clusters=clusters)
            cs = membership2cs(vcl.membership)
        elif alg == 'multilevel':
            vcl = G.community_multilevel()
            cs = membership2cs(vcl.membership)
        elif alg == 'optmod':  # too long
            membership, modularity = G.community_optimal_modularity()
            cs = membership2cs(vcl.membership)
        elif alg == 'edge_betweenness':
            clusters = None if params['clusters'] == 'auto' else int(
                params['clusters'])
            dendrogram = G.community_edge_betweenness(clusters, directed=False)
            try:
                clusters = dendrogram.as_clustering()
            except:
                return []
            cs = membership2cs(clusters.membership)
        elif alg == 'spinglass':  # only for connected graph
            vcl = G.community_spinglass(parupdate=True,
                                        update_rule=params['update_rule'],
                                        start_temp=float(params['start_temp']),
                                        stop_temp=float(params['stop_temp']))
            cs = membership2cs(vcl.membership)
        elif alg == 'walktrap':
            dendrogram = G.community_walktrap(steps=int(params['steps']))
            try:
                clusters = dendrogram.as_clustering()
            except:
                return []
            cs = membership2cs(clusters.membership)
        elif alg == 'leiden':
            vcl = G.community_leiden(
                objective_function=params['objective_function'],
                resolution_parameter=float(params['resolution_parameter']),
                n_iterations=int(params['n_iterations']))
            cs = membership2cs(vcl.membership)
        elif alg == 'hlc':
            algorithm = HLC(G, min_size=int(params['min_size']))
            cs = algorithm.run(None)

    elif alg in ("sbm", "sbm_nested"):
        np.random.seed(42)
        gt.seed_rng(42)

        G = gt.Graph(directed=False)
        G.add_edge_list(edges)

        deg_corr = bool(params['deg_corr'])
        B_min = None if params['B_min'] == 'auto' else int(params['B_min'])
        B_max = None if params['B_max'] == 'auto' else int(params['B_max'])

        if alg == "sbm":
            state = gt.minimize_blockmodel_dl(G,
                                              deg_corr=deg_corr,
                                              B_min=B_min,
                                              B_max=B_max)

            membership = state.get_blocks()
            cs = membership2cs(membership)
        if alg == "sbm_nested":
            state = gt.minimize_nested_blockmodel_dl(G,
                                                     deg_corr=deg_corr,
                                                     B_min=B_min,
                                                     B_max=B_max)
            levels = state.get_bs()
            level_max = int(params['level'])

            membership = {}
            for nid in range(nnodes):
                cid = nid
                level_i = len(levels)
                for level in levels:
                    cid = level[cid]
                    if level_i == level_max:
                        membership.setdefault(cid, []).append(nid)
                        break
                    level_i -= 1

            cs = membership.values()

    else:
        return None

    return list(cs)
Пример #7
0
 def angel(threshold , min_com_size) : return lambda G : algorithms.angel(G, threshold, min_com_size)
 algos['angel'] = angel(0.25, 20)