Exemplo n.º 1
0
 def test_bad_partition(self):
     """Tests that a poor partition has a low coverage measure."""
     G = barbell_graph(3, 0)
     partition = [{0, 1, 4}, {2, 3, 5}]
     assert 3 / 7 == pytest.approx(coverage(G, partition), abs=1e-7)
     assert 3 / 7 == pytest.approx(partition_quality(G, partition)[0],
                                   abs=1e-7)
Exemplo n.º 2
0
 def test_good_partition(self):
     """Tests that a good partition has a high coverage measure."""
     G = barbell_graph(3, 0)
     partition = [{0, 1, 2}, {3, 4, 5}]
     assert 6 / 7 == pytest.approx(coverage(G, partition), abs=1e-7)
     assert 6 / 7 == pytest.approx(partition_quality(G, partition)[0],
                                   abs=1e-7)
def communitiesRandomModule(graphForCommunities):
    # function for random analysis
    # variables
    com = 0
    communitiesListCNM = []
    listCommunities = []
    communitiesDict = dict()
    community = None
    vertex = None
    networkModularity = 0
    networkCoverage = 0
    networkPerformance = 0
    counter = 0
    # get Clauset-Newman-Moore communities
    communitiesListCNM = list(
        greedy_modularity_communities(graphForCommunities))
    # evaluate modularity
    for community in communitiesListCNM:
        for vertex in set(community):
            communitiesDict[vertex] = com
        listCommunities.append(set(community))
        com = com + 1
    networkModularity = louv.modularity(communitiesDict, graphForCommunities)
    networkCoverage = coverage(graphForCommunities, listCommunities)
    networkPerformance = performance(graphForCommunities, listCommunities)
    # end of function
    return (networkModularity, len(listCommunities), networkCoverage,
            networkPerformance)
    def increase_threshold(self):

        size_of_cluster1_list = []
        num_of_cluster_list = []
        measure_list = []

        for alpha in self.alphas:

            causal_matrix = np.where(self.vs_corr_gene > alpha, 1, 0)
            G = nx.Graph(causal_matrix)
            group_class = list(greedy_modularity_communities(G))

            measure = coverage(G, group_class)
            num_of_cluster = len(group_class)
            indices_in_cluster1 = list(group_class[0])
            size_of_cluster1 = len(indices_in_cluster1)

            print(num_of_cluster)
            print(size_of_cluster1)

            size_of_cluster1_list.append(size_of_cluster1)
            num_of_cluster_list.append(num_of_cluster)
            measure_list.append(measure)

        plt.figure(figsize=(10, 10))
        plt.plot(self.alphas,
                 size_of_cluster1_list,
                 linestyle='-',
                 marker='o',
                 color='b')
        plt.xlabel(r'$\tau$')
        plt.ylabel('Size of the largest community')
        plt.savefig("/home/liz3/Desktop/size_of_leading_community")
        plt.close()

        plt.figure(figsize=(10, 10))
        plt.plot(self.alphas,
                 num_of_cluster_list,
                 linestyle='-',
                 marker='o',
                 color='b')
        plt.xlabel(r'$\tau$')
        plt.ylabel('Number of communities')
        plt.savefig("/home/liz3/Desktop/number_of_community")
        plt.close()

        plt.figure(figsize=(10, 10))
        plt.plot(self.alphas,
                 measure_list,
                 linestyle='-',
                 marker='o',
                 color='b')
        plt.xlabel(r'$\tau$')
        plt.ylabel('Coverage')
        plt.savefig("/home/liz3/Desktop/measure_of_partition")
        plt.close()
    def community_detection(self):
        """
        Detects characteristics related to communities of graph and writes them 
        down to the 'Communities.txt' file. It also compares these characteristics
        with a random graph of the same node-size and edge-size.
        """
        partitionx = communityx.greedy_modularity_communities(self.G)
        """Modularity & Coverage"""
        modularity = community.modularity(self.best_parts, self.G)  #XXX
        coverage = communityx.coverage(self.G, partitionx)
        """in the corresponding random graph"""
        # H = nx.gnm_random_graph(self.G.number_of_nodes(),self.G.number_of_edges())
        H = nx.configuration_model([d for v, d in self.G.degree()])

        part = community.best_partition(H)  #XXX
        part2 = communityx.greedy_modularity_communities(H)
        modularity_rand = community.modularity(part, H)
        coverage_rand = communityx.coverage(H, part2)
        """Write File"""
        title = 'Communities.txt'
        com_file = open(self.path + title, 'w')
        com_file.write('Modularity:' + '\n')
        com_file.write(str(modularity) + '\n')
        com_file.write('Coverage' + '\n')
        com_file.write(str(coverage) + '\n')
        com_file.write('The corresponding random graph has modularity:' + '\n')
        com_file.write(str(modularity_rand) + '\n')
        com_file.write('The corresponding random graph has coverage:' + '\n')
        com_file.write(str(coverage_rand))
        com_file.write('\n')
        com_file.write('number of communities:' + '\n')
        com_file.write(str(max(self.best_parts.values()) + 1) + '\n')  #XXX
        # com_file.write(str(max(self.best_parts_x.values())+1)+'\n')
        com_file.write('\n')
        com_file.write(
            'The coverage of a partition is the ratio of the number of intra-community edges to the total number of edges in the graph.'
        )
        com_file.close()
        return modularity, coverage, modularity_rand, coverage_rand
def AnalizeCommunitiesAndMakeDrawings(name, fileName, someNetwork):
    # function message
    print("\t- Obtaining communities (for undir network version)...")
    # variables
    communitiesFinalResult = ""
    communitiesResult = "Communities (Undirected):\n"
    graphForCommunities = nx.Graph()
    read = "ok"
    com = 0
    communitiesListCNM = []
    listCommunities = []
    communitiesDict = dict()
    community = None
    vertex = None
    networkModularity = 0
    networkCoverage = 0
    networkPerformance = 0
    counter = 0
    # get undir graph integer-weighted
    (graphForCommunities, read) = ParseFileToNetwork(fileName, "-u",
                                                     "community")
    # get Clauset-Newman-Moore communities
    communitiesListCNM = list(
        greedy_modularity_communities(graphForCommunities))
    # evaluate modularity
    for community in communitiesListCNM:
        for vertex in set(community):
            communitiesDict[vertex] = com
        listCommunities.append(set(community))
        com = com + 1
    networkModularity = louv.modularity(communitiesDict, graphForCommunities)
    networkCoverage = coverage(graphForCommunities, listCommunities)
    networkPerformance = performance(graphForCommunities, listCommunities)
    communitiesResult = communitiesResult + "- Modularity: " + str(
        networkModularity) + "\n"
    communitiesResult = communitiesResult + "- Coverage: " + str(
        networkCoverage) + "\n"
    communitiesResult = communitiesResult + "- Performance: " + str(
        networkPerformance) + "\n"
    communitiesResult = communitiesResult + "- Number of Communities: " + str(
        len(listCommunities)) + "\n"
    for counter in range(len(listCommunities)):
        communitiesResult = communitiesResult + "- Number of nodes in community " + str(
            counter + 1) + ": " + str(len(listCommunities[counter])) + "\n"
        communitiesResult = communitiesResult + "- Nodes in C_" + str(
            counter + 1) + ": " + ",".join(listCommunities[counter]) + "\n"
    # plot graph with communities
    drawCommunities(someNetwork, listCommunities, name)
    # end of function
    communitiesFinalResult = "\n\n" + communitiesResult + "\n\n"
    return (communitiesFinalResult)
Exemplo n.º 7
0
def evaluate_coverage(G, commu_list):
    
    # initiate a dictionary to store coverages
    coverages = {}
    
    # loop over all community structure
    for c in commu_list:
        communities = c['communities']
        # prepare element for coverage calculation
        commu = []
        # loop over communities in an algorithm
        for i in sorted(pd.Series(communities.values()).unique()):
            store_list = []
            # loop over all nodes
            for node in G.nodes:
                if communities[node] == i:
                    store_list.append(node)
            commu.append(store_list)
        # calculate coverage
        coverage = nx_comm.coverage(G, commu)
        # store value in `coverages`
        coverages[c['algo']] = coverage
    
    return coverages
Exemplo n.º 8
0
 def test_good_partition(self):
     """Tests that a good partition has a high coverage measure."""
     G = barbell_graph(3, 0)
     partition = [set([0, 1, 2]), set([3, 4, 5])]
     assert_almost_equal(6 / 7, coverage(G, partition))
Exemplo n.º 9
0
 def test_bad_partition(self):
     """Tests that a poor partition has a low coverage measure."""
     G = barbell_graph(3, 0)
     partition = [set([0, 1, 4]), set([2, 3, 5])]
     assert_almost_equal(3 / 7, coverage(G, partition))
Exemplo n.º 10
0
 def test_good_partition(self):
     """TestData that a good partition has a high coverage measure."""
     G = barbell_graph(3, 0)
     partition = [{0, 1, 2}, {3, 4, 5}]
     assert almost_equal(6 / 7, coverage(G, partition))
Exemplo n.º 11
0
 def test_bad_partition(self):
     """TestData that a poor partition has a low coverage measure."""
     G = barbell_graph(3, 0)
     partition = [{0, 1, 4}, {2, 3, 5}]
     assert almost_equal(3 / 7, coverage(G, partition))
Exemplo n.º 12
0
def coverage_wrapper(partition, G):
    community_map = extract_community_map(partition)
    return algorithms.coverage(G, community_map)
Exemplo n.º 13
0
    def get_quality_metrics(self, station_df, lst_graphs):

        import dunn as di
        from sklearn import metrics
        import networkx as nx
        import networkx.algorithms.community as nx_comm
        import numpy as np
        import pandas as pd
        import traceback

        quality_metric_df = pd.DataFrame([])

        try:
            #d            _n_num_clust = len(station_df['label'].unique())     # Generated Cluster Count
            _n_num_clust = len([
                x for x in station_df['label'].unique() if x > -1
            ])  # Generated Cluster Count
            if _n_num_clust <= 1:
                raise ValueError(
                    'Cannot compute quality metric for %d clusters' %
                    (_n_num_clust))
            ''' returns the simple graph of the clusters and the set dictionary of cluster nodes '''
            G_simple_, l_G_clusters_ = self.__get_graph_n_labels(station_df)

            _s_st_types = str(station_df['st_type'].unique())  # Station Types
            _n_tot_num_st = station_df.shape[0]  # Station Quantity
            _f_min_dist = self._max_distance  # Minimum Distance
            _n_min_pts = self._minimum_samples  # Minimum Points
            _s_clust = str(self._name)  # Clustering Name
            _s_algo = str(self._algorithm)  # Algorithm
            _s_metric = str(self._metric)  # Metric
            _s_method = str(self._cluster_method)  # Method
            _s_seed = str(self._seed)  # Seed
            __lst_valid_cloud_clust = [
                frozenset(clust) for clust in l_G_clusters_
                if len(clust) >= self._minimum_samples
            ]
            _n_valid_clust = len(
                __lst_valid_cloud_clust)  # Valid Cluster Count

            # Clustered Station Count
            _n_sts_in_clusters = 0
            for x in __lst_valid_cloud_clust:
                _n_sts_in_clusters += len(x)

            _n_noise = station_df.shape[
                0] - _n_sts_in_clusters  # Unclsutered Noise Count
            _n_avg_deg = sum([
                d for n, d in G_simple_.degree()
                if G_simple_.nodes[n]["label"] > -1
            ]) / _n_sts_in_clusters  # Average Node Degree
            ''' Compute the accuracy of r-regularity constraint on the individual clusters by considering the
                systematic error that is a reproducible inaccuracy consistent for the same clustering strategy.
                For such we apply the weighted mean absolute error to estimate the deviation from the expected degree.
            '''
            sum_deg_abs_err = 0
            _deg_wmae = 0
            _deg_err_st_count = 0
            #p            print("\nclusters:",len(lst_graphs))
            for H in lst_graphs:
                H = nx.Graph(H)
                H.remove_nodes_from(list(nx.isolates(H)))
                H.remove_nodes_from(
                    [n for n, v in H.nodes(data=True) if v["label"] == -1])
                H_deg_abs_err = 0
                _l_deg_diff = []
                if H.number_of_nodes() > 0:
                    _l_deg_diff = [
                        _n_min_pts - 1 - d for n, d in H.degree()
                        if (int(d) < int(_n_min_pts -
                                         1) and H.nodes[n]["label"] > -1)
                    ]
                if len(_l_deg_diff) > 0:
                    #p                    print("\ndegree mean absolute error")
                    #p                    print("minPts:",_n_min_pts)
                    #p                    print("list deg diff:",_l_deg_diff)
                    #p                    print("graph nodes:",sorted([d for n,d in H.degree()]))
                    sum_deg_abs_err += sum(_l_deg_diff)
                    _deg_err_st_count += len(_l_deg_diff)
            if _deg_err_st_count > 0:
                _deg_wmae = sum_deg_abs_err / (_deg_err_st_count *
                                               (_n_min_pts - 1))
#p                print("_deg_wmae", _deg_wmae,_deg_err_st_count)
            ''' prepare valid stations for measuring the quality'''
            lst_st = list(nx.get_node_attributes(G_simple_, 'pos').values())
            lst_lbl = list(nx.get_node_attributes(G_simple_, 'label').values())

            _f_silhouette = metrics.silhouette_score(
                lst_st, lst_lbl, metric='haversine')  # Silhouette Coefficient
            _f_cal_har = metrics.calinski_harabasz_score(
                lst_st, lst_lbl)  # Calinski Harabaz score
            _f_dav_bould = metrics.davies_bouldin_score(
                lst_st, lst_lbl)  # Davies Bouldin score
            _f_dunn = di.dunn_fast(lst_st, lst_lbl)  # Dunn Index
            _f_modul = nx_comm.modularity(G_simple_,
                                          l_G_clusters_)  # Modularity

            try:
                l_conductance = list(
                    nx.conductance(G_simple_, cluster_i, weight='distance')
                    for cluster_i in __lst_valid_cloud_clust)
                _f_conduct = sum(l_conductance) / len(
                    l_conductance)  # Conductance Average
            except Exception:
                _f_conduct = 0
            _f_cover = nx_comm.coverage(G_simple_,
                                        l_G_clusters_)  # Coverage Score
            _f_perform = nx_comm.performance(
                G_simple_, l_G_clusters_)  # Performance Score

            dict_quality_mesrs = {
                'Station Types': _s_st_types,
                'Station Quantity': _n_tot_num_st,
                'Maximum Distance': _f_min_dist,
                'Minimum Points': _n_min_pts,
                'Name': _s_clust,
                'Algorithm': _s_algo,
                'Metric': _s_metric,
                'Method': _s_method,
                'Seed': _s_seed,
                'Generated Cluster Count': _n_num_clust,
                'Valid Cluster Count': _n_valid_clust,
                'Clustered Station Count': _n_sts_in_clusters,
                'Unclsutered Noise Count': _n_noise,
                'Average Station Degree': _n_avg_deg,
                'Degree Weighted Mean Absolute Error': _deg_wmae,
                'Degree Error Station Count': _deg_err_st_count,
                'Silhouette Coefficient': _f_silhouette,
                'Calinski Harabaz score': _f_cal_har,
                'Davies Bouldin score': _f_dav_bould,
                'Dunn Index': _f_dunn,
                'Modularity': _f_modul,
                'Conductance Average': _f_conduct,
                'Coverage Score': _f_cover,
                'Performance Score': _f_perform,
            }
            #            print('Dict qual',dict_quality_mesrs('Seed'))
            quality_metric_df = pd.DataFrame(dict_quality_mesrs,
                                             index=[_s_clust])
            quality_metric_df.reset_index(drop=True, inplace=True)

        except Exception as err:
            print(
                "Class cluster_quality_metric [get_quality_metrics] Error message:",
                err)
            #            print(G_simple_.edges('distance'))
            print(traceback.format_exc())

        return quality_metric_df
Exemplo n.º 14
0
 def test_good_partition(self):
     """Tests that a good partition has a high coverage measure."""
     G = barbell_graph(3, 0)
     partition = [{0, 1, 2}, {3, 4, 5}]
     assert_almost_equal(6 / 7, coverage(G, partition))
Exemplo n.º 15
0
 def test_bad_partition(self):
     """Tests that a poor partition has a low coverage measure."""
     G = barbell_graph(3, 0)
     partition = [{0, 1, 4}, {2, 3, 5}]
     assert_almost_equal(3 / 7, coverage(G, partition))
Exemplo n.º 16
0

# Plot the partitioned graph
plt.figure()
node_poses = nx.spring_layout(G_parted)
nx.draw_networkx_nodes(G_parted, node_poses, node_size = 50, labels=bundlename_ref_parted_str)
nx.draw_networkx_labels(G_parted, node_poses, node_size = 50, labels=bundlename_ref_parted_str)
#nx.draw_networkx_edges(G_partitioned, node_poses)

conn_comps_G_parted = [G_parted.subgraph(c).copy() for c in nx.connected_components(G_parted)]

start = time.time()
c = list(community.greedy_modularity_communities(G))
end = time.time()
total_time = end - start
G_coverage = community.coverage(G, c)

plt.figure()
for i in range(0, len(c)):
    hallo = G_nearest.subgraph(c[i])
    node_poses_nearest = nx.spring_layout(G_nearest.subgraph(c[i]))
    nx.draw_networkx_nodes(G_nearest.subgraph(c[i]), node_poses_nearest, node_size=50, labels=cardname_ref)
    nx.draw_networkx_labels(G_nearest.subgraph(c[i]), node_poses_nearest, node_size=50, labels=cardname_ref)
    plt.show()

plt.figure()
plt.imshow(conn_matrix)
plt.show()

plt.figure()
plt.imshow(nearest_neighbour_matrix)