Пример #1
0
def export_log(G, communities, dataset, algorithm, d_threshold, w_threshold,
               path):
    '''export community result to a log file for manually analysis
    '''
    with open(path, 'w') as f:
        # write some key information first
        line = "dataset: " + dataset + "\n"
        line += "algorithm: " + algorithm + "\n"
        line += "d_threshold: " + str(d_threshold) + "\n"
        line += "w_threshold: " + str(w_threshold) + "\n"
        line += "time: " + time.asctime(time.localtime(time.time())) + "\n"
        line += "-------------------------------------\n"
        line += "communities: " + str(len(communities)) + "\n"
        line += "modularity: " + str(round(modularity(G, communities),
                                           3)) + "\n"
        line += "performance: " + str(round(performance(G, communities),
                                            3)) + "\n"
        line += "=====================================\n"
        f.write(line)

        # write community line by line
        for community in communities:
            namelist = list(community)
            line = ", ".join(namelist)
            f.write(line + '\n')

    print("[Done] export log file:", path)
Пример #2
0
    def test_good_partition(self):
        """Tests that a good partition has a high performance measure.

        """
        G = barbell_graph(3, 0)
        partition = [{0, 1, 2}, {3, 4, 5}]
        assert_almost_equal(14 / 15, performance(G, partition))
Пример #3
0
    def test_good_partition(self):
        """Tests that a good partition has a high performance measure.

        """
        G = barbell_graph(3, 0)
        partition = [{0, 1, 2}, {3, 4, 5}]
        assert_almost_equal(14 / 15, performance(G, partition))
Пример #4
0
def best_split(wordPairs):
    """
    Giving a Graph, return the best community partition
    
    :param Graph: a graph constructed with the most similar word pairs 
    :return: (level of partition that gives the best performance, best performance, best partition)
    """
    from networkx.algorithms import community
    from networkx.algorithms.community.quality import performance, coverage
    import networkx as nx

    Graph = nx.Graph()
    edges = [(pair[0][0], pair[0][1]) for pair in wordPairs]
    edgewidth = [pair[1] * 10 for pair in wordPairs]
    Graph.add_edges_from(edges)

    max_pc = 0
    max_index = None
    best_communities = None
    communities_generator = community.girvan_newman(Graph)
    for i, communities in enumerate(communities_generator):
        p = performance(Graph, communities)
        c = coverage(Graph, communities)
        if 2 * p * c / (p + c) > max_pc:
            max_index = i
            max_pc = 2 * p * c / (p + c)
            best_communities = communities
    return (max_index, max_pc, best_communities)
Пример #5
0
def calc_graph_measures(data_matrix, thresh=0):
    from networkx import eccentricity
    from networkx.algorithms.efficiency import global_efficiency
    from networkx.algorithms.shortest_paths.generic import average_shortest_path_length
    from networkx.algorithms.centrality import betweenness_centrality
    from networkx.algorithms.cluster import average_clustering
    from networkx.algorithms.community.modularity_max import greedy_modularity_communities
    from networkx.algorithms.community.quality import performance

    def _avg_values(results):
        values = []
        if isinstance(results, dict):
            for k in results:
                values.append(results[k])
        elif isinstance(results, list):
            for tup in results:
                values.append(tup[1])

        return np.mean(values)

    below_thresh_indices = np.abs(data_matrix) < thresh
    data_matrix[below_thresh_indices] = 0
    if isinstance(data_matrix, np.ndarray):
        graph = networkx.convert_matrix.from_numpy_matrix(np.real(data_matrix))
    if isinstance(data_matrix, pd.DataFrame):
        graph = networkx.convert_matrix.from_pandas_adjacency(data_matrix)

    degree = list(graph.degree)
    global_eff = global_efficiency(graph)
    b_central = betweenness_centrality(graph)
    modularity = performance(graph, greedy_modularity_communities(graph))
    try:
        ecc = eccentricity(graph)
    except networkx.exception.NetworkXError:
        ecc = [(0, 0)]

    try:
        clust = average_clustering(graph)
    except networkx.exception.NetworkXError:
        clust = 0

    try:
        char_path = average_shortest_path_length(graph)
    except networkx.exception.NetworkXError:
        char_path = 0

    graph_dict = {'degree': _avg_values(degree),
                  'eccentricity': _avg_values(ecc),
                  'global_efficiency': global_eff,
                  'characteristic_path_length': char_path,
                  'betweenness_centrality': _avg_values(b_central),
                  'clustering_coefficient': clust,
                  'modularity': modularity}

    return graph_dict
Пример #6
0
    def clustering_statistics(self, community_partition, feat_name, feat_desc,
                              feat_interpret):
        """Compute quality of the community partitions."""
        compl_desc = " of the partition of " + feat_desc

        self.add_feature(
            feat_name + "_modularity",
            lambda graph: quality.modularity(graph, community_partition),
            "Modularity" + compl_desc,
            feat_interpret,
        )

        self.add_feature(
            feat_name + "_coverage",
            lambda graph: quality.coverage(graph, community_partition),
            "Coverage" + compl_desc,
            feat_interpret,
        )
        self.add_feature(
            feat_name + "_performance",
            lambda graph: quality.performance(graph, community_partition),
            "Performance" + compl_desc,
            feat_interpret,
        )
        self.add_feature(
            feat_name + "_inter_community_edges",
            lambda graph: quality.inter_community_edges(
                graph, community_partition),
            "Inter community edges" + compl_desc,
            feat_interpret,
        )
        self.add_feature(
            feat_name + "_inter_community_non_edges",
            lambda graph: quality.inter_community_non_edges(
                graph, community_partition),
            "Inter community non edges" + compl_desc,
            feat_interpret,
        )
        self.add_feature(
            feat_name + "_intra_community_edges",
            lambda graph: quality.intra_community_edges(
                graph, community_partition),
            "Intra community edges" + compl_desc,
            feat_interpret,
        )
Пример #7
0
 def _compute_performance(self, partition, weight=None):
     return performance(self.graph, _get_community_sets(partition))
Пример #8
0
    ##--------------------- print for label propagation result
    G_treated = label_prop(G, max_iter=100)
    labels = [G_treated.nodes[node]["label"] for node in G_treated.nodes]
    # print(labels)
    labels = list(set(labels))
    partitions = []
    for label in labels:
        partitions.append(
            set([
                node for node in G_treated.nodes
                if G_treated.nodes[node]["label"] == label
            ]))
    # start = time.time()
    print('modularity, coverage, performance : ',
          modularity(G_treated, partitions), coverage(G_treated, partitions),
          performance(G_treated, partitions))
    # end = time.time()
    # print(end-start)

    ##--------------------- print for louvain result
    # start = time.time()
    partition = community_louvain.best_partition(G)
    # print(partition)
    labels = [partition[node] for node in G.nodes]
    labels = list(set(labels))
    partitions = []
    for label in labels:
        partitions.append(
            set([node for node in G.nodes if partition[node] == label]))
    # print(modularity(partitions, G))
    # end = time.time()
def main():
    global VERBOSE
    h, v, w, mu, mu_val = parse(' '.join(sys.argv[1:]))
    
    if h:
        print("- Use -v to activate Verbose")
        print("- Use -w to exclude the genetic algorithm from the run")
        print("- Use -mu value to set the value of mu in the graph generators; value should be in the range (0, 1)")
        return

    if v:
        VERBOSE = True
    else:
        VERBOSE = False

    if w:
        algorithms = [clauset_newman_moore, louvain, reneel]
    else:
        algorithms = [clauset_newman_moore, louvain, reneel, gcm]
    

    if VERBOSE:
        print("Start process")

    # small graphs
    non_lfr_runs(algorithms)

    with open('results/small_c1_test.json', 'w') as fs:
        json.dump(RESULTS_S, fs)
    
    

    # lfr benchmark graphs
    sizes = [250, 500, 1000, 1500, 2000, 2500, 3000]
    for n in sizes:
        G, target_partition, target_communities = genrate_lfr_graph(size=n, mu=mu_val)
        nodes_no = n
        edges_no = G.number_of_edges()
        avg_degree = sum([G.degree[i] for i in range(n)]) / nodes_no
        print("========================================================")
        print(nodes_no, edges_no, avg_degree)
        print("========================================================")

        pos = nx.spring_layout(G)

        results = [alg(G) for alg in algorithms]
        partitions = [r[0] for r in results]

        metrics = [(coverage(G, r[1]), performance(G, r[1]), 
                    normalized_mutual_info_score(convert_to_array(target_partition), 
                                                 convert_to_array(r[0])
                                                ))
                    for r in results]
        
        runtimes = [r[2] for r in results]
          
        for idx in range(len(metrics)):
            RESULTS_LFR[NAMES[idx]][n] = {
                "coverage": metrics[idx][0],
                "performance": metrics[idx][1],
                "nmi": metrics[idx][2],
                "runtime": runtimes[idx],
            }
            if VERBOSE:
                print(
                    f"The coverage obtained by {algorithms[idx].__name__} was " + "%.4f" % metrics[idx][0])
                print(
                    f"The performance obtained by {algorithms[idx].__name__} was " + "%.4f" % metrics[idx][1])
                print(
                    f"The NMI score obtained by {algorithms[idx].__name__} was " + "%.4f" % metrics[idx][2])
                print("========================================================")

        parallel_display(algorithms, partitions, G, pos)
    
    with open('results/lfr_c1_test.json', 'w') as fb:
        json.dump(RESULTS_LFR, fb)
def non_lfr_runs(algorithms):
    # Karate Club graph
    karate_g = generate_karate_club_graph()
    pos = nx.spring_layout(karate_g)
    results = [alg(karate_g) for alg in algorithms]
    partitions = [r[0] for r in results]

    metrics = [(coverage(karate_g, r[1]), 
                performance(karate_g, r[1]),
                modularity(karate_g, r[1]),)
                for r in results]

    runtimes = [r[2] for r in results]

    for idx in range(len(metrics)):
        RESULTS_S[NAMES[idx]]["Karate"] = {
                "coverage": metrics[idx][0],
                "performance": metrics[idx][1],
                "modularity": metrics[idx][2],
                "runtime": runtimes[idx],
            }

        if VERBOSE:
            print(
                f"The coverage obtained by {algorithms[idx].__name__} on the Karate Club graph was " +
                "%.4f" % metrics[idx][0])
            print(
                f"The performance obtained by {algorithms[idx].__name__} on the Karate Club graph was " +
                "%.4f" % metrics[idx][1])
            print(
                f"The final modularity obtained by {algorithms[idx].__name__} on the Karate Club graph was " +
                "%.4f" % metrics[idx][2])
            print("========================================================")


    parallel_display(algorithms, partitions, karate_g, pos)

    # simple Caveman graph 4 6
    caveman_g = generate_caveman_graph(cliques=4, size=6)
    pos = nx.spring_layout(caveman_g)
    results = [alg(caveman_g) for alg in algorithms]
    partitions = [r[0] for r in results]

    metrics = [(coverage(caveman_g, r[1]), 
                performance(caveman_g, r[1]),
                modularity(caveman_g, r[1]),)
                for r in results]

    runtimes = [r[2] for r in results]

    for idx in range(len(metrics)):
        RESULTS_S[NAMES[idx]]["Caveman46"] = {
                "coverage": metrics[idx][0],
                "performance": metrics[idx][1],
                "modularity": metrics[idx][2],
                "runtime": runtimes[idx],
            }
        if VERBOSE:
            print(
                f"The coverage obtained by {algorithms[idx].__name__} on the Caveman46 graph was " +
                "%.4f" % metrics[idx][0])
            print(
                f"The performance obtained by {algorithms[idx].__name__} on the Caveman46 graph was " +
                "%.4f" % metrics[idx][1])
            print(
                f"The final modularity obtained by {algorithms[idx].__name__} on the Caveman46 graph was " +
                "%.4f" % metrics[idx][2])
            print("========================================================")

    parallel_display(algorithms, partitions, caveman_g, pos)

    # simple Caveman graph 7 3
    caveman_g = generate_caveman_graph(cliques=7, size=3)
    pos = nx.spring_layout(caveman_g)
    results = [alg(caveman_g) for alg in algorithms]
    partitions = [r[0] for r in results]

    metrics = [(coverage(caveman_g, r[1]),
                performance(caveman_g, r[1]),
                modularity(caveman_g, r[1]),)
               for r in results]

    runtimes = [r[2] for r in results]

    for idx in range(len(metrics)):
        RESULTS_S[NAMES[idx]]["Caveman73"] = {
                "coverage": metrics[idx][0],
                "performance": metrics[idx][1],
                "modularity": metrics[idx][2],
                "runtime": runtimes[idx],
            }
        if VERBOSE:
            print(
                f"The coverage obtained by {algorithms[idx].__name__} on the Caveman73 graph was " +
                "%.4f" % metrics[idx][0])
            print(
                f"The performance obtained by {algorithms[idx].__name__} on the Caveman73 graph was " +
                "%.4f" % metrics[idx][1])
            print(
                f"The final modularity obtained by {algorithms[idx].__name__} on the Caveman73 graph was " +
                "%.4f" % metrics[idx][2])
            print("========================================================")

    parallel_display(algorithms, partitions, caveman_g, pos)
Пример #11
0
print()
print("gmc", gmc)
print("alc", alc)
print("lpac", lpac)
print("async_fluidc", asfl)
# print("girvan_newman",girvanNewmanCommunities)
print("FuzAg Communties")
for i in finalCommunities:
    print(i, end="-> ")
    for j in finalCommunities[i]:
        print(j, end=" ")
    print()

listOfFinalCommunties = []
for i in finalCommunities:
    listOfFinalCommunties.append(finalCommunities[i])
coverageOfFuzAg = coverage(Graph, listOfFinalCommunties)
print("coverage Of FuzAg", coverageOfFuzAg)
print("coverage of greedy_modularity_communities", coverage(Graph, gmc))
print("coverage of async_lpa_communities", coverage(Graph, alc))
print("coverage of label_propagation_communities", coverage(Graph, lpac))
print("coverage of Async Fluid Communties", coverage(Graph, asfl))

performanceOfFuzAg = performance(Graph, listOfFinalCommunties)
print("performance Of FuzAg", performanceOfFuzAg)
print("performance of greedy_modularity_communities", performance(Graph, gmc))
print("performance of async_lpa_communities", performance(Graph, alc))
print("performance of label_propagation_communities", performance(Graph, lpac))
print("performance of Async Fluid Communties", performance(Graph, asfl))
Пример #12
0
 def test_bad_partition(self):
     """Tests that a poor partition has a low performance measure."""
     G = barbell_graph(3, 0)
     partition = [{0, 1, 4}, {2, 3, 5}]
     assert_almost_equal(8 / 15, performance(G, partition))
Пример #13
0
 def test_bad_partition(self):
     """Tests that a poor partition has a low performance measure."""
     G = barbell_graph(3, 0)
     partition = [{0, 1, 4}, {2, 3, 5}]
     assert_almost_equal(8 / 15, performance(G, partition))
def export_gml(G, communities, path):
    # get a copy of the original graph
    G_copy = G.copy()

    # build node group dictionary
    node_group = {}
    com_num = 0
    for community in communities:
        for v in community:
            node_group[v] = {'community': com_num}
        com_num += 1

    # set node group as G_copy's node attribute
    nx.set_node_attributes(G_copy, node_group)

    # export G_copy to .gml file
    nx.write_gml(G_copy, path)
    print("Already export gml file!")

G = nx.readwrite.gml.read_gml("weighted_graph.gml")

communities_gn = girvan_newman(G, 6)
communities_lp = label_propagation(G)
communities_mc = markov_cluster(G, power=2, inflation=2, numIter=5, decimals=2)

print("gn:", len(communities_gn), quality.modularity(G, communities_gn), quality.performance(G, communities_gn))
print("lp:", len(communities_lp), quality.modularity(G, communities_lp), quality.performance(G, communities_lp))
print("mc:", len(communities_mc), quality.modularity(G, communities_mc), quality.performance(G, communities_mc))

export_gml(G, communities_lp, "test_lp.gml")