Exemplo n.º 1
0
    def modularity_density(self):
        """
        The modularity density is one of several propositions that envisioned to palliate the resolution limit issue of modularity based measures.
        The idea of this metric is to include the information about algorithms size into the expected density of algorithms to avoid the negligence of small and dense communities.
        For each algorithms :math:`C` in partition :math:`S`, it uses the average modularity degree calculated by :math:`d(C) = d^{int(C)} − d^{ext(C)}` where :math:`d^{int(C)}` and :math:`d^{ext(C)}` are the average internal and external degrees of :math:`C` respectively to evaluate the fitness of :math:`C` in its network.
        Finally, the modularity density can be calculated as follows:

        .. math:: Q(S) = \\sum_{C \\in S} \\frac{1}{n_C} ( \\sum_{i \\in C} k^{int}_{iC} - \\sum_{i \\in C} k^{out}_{iC})

        where :math:`n_C` is the number of nodes in C, :math:`k^{int}_{iC}` is the degree of node i within :math:`C` and :math:`k^{out}_{iC}` is the deree of node i outside :math:`C`.


        :return: the modularity density score

        :Example:

        >>> from cdlib.algorithms import louvain
        >>> g = nx.karate_club_graph()
        >>> communities = louvain(g)
        >>> mod = communities.modularity_density()

        :References:

        Li, Z., Zhang, S., Wang, R. S., Zhang, X. S., & Chen, L. (2008). **Quantitative function for algorithms detection.** Physical review E, 77(3), 036109.

        """

        if self.__check_graph():
            return evaluation.modularity_density(self.graph, self)
        else:
            raise ValueError("Graph instance not specified")
Exemplo n.º 2
0
    def test_modularity(self):
        g = nx.karate_club_graph()
        communities = louvain(g)

        mod = evaluation.newman_girvan_modularity(g, communities)
        self.assertLessEqual(mod.score, 1)
        self.assertGreaterEqual(mod.score, -0.5)

        mod = evaluation.erdos_renyi_modularity(g, communities)
        self.assertLessEqual(mod.score, 1)
        self.assertGreaterEqual(mod.score, -0.5)

        mod = evaluation.modularity_density(g, communities)
        self.assertIsInstance(mod.score, float)

        mod = evaluation.z_modularity(g, communities)
        self.assertLessEqual(mod.score, np.sqrt(g.number_of_nodes()))
        self.assertGreaterEqual(mod.score, -0.5)
Exemplo n.º 3
0
def getAllScoresDict(g, _reference, _communities, executionTime):
    scores = {}
    scores['time'] = executionTime
    reference = copy.deepcopy(_reference)
    reference.communities = complete_partition(reference.communities, g, mode='new_cluster')
    communities = copy.deepcopy(_communities)
    communities.communities = complete_partition(communities.communities, g, mode='new_cluster')



    # scores['adjusted_mutual_information'] = evaluation.adjusted_mutual_information(reference,communities).score

    # returns MatchingResult object
    # scores['adjusted_rand_index'] = evaluation.adjusted_rand_index(reference,communities).score
    # Compute the average F1 score of the optimal algorithms matches among the partitions in input.
    try:
        scores['f1'] = evaluation.f1(reference, communities).score
    except:
        scores['f1'] = np.nan
    # Compute the Normalized F1 score of the optimal algorithms matches among the partitions in input.
    try:
        scores['nf1'] = evaluation.nf1(reference, communities).score
    except:
        scores['nf1'] = np.nan
    # Normalized Mutual Information between two clusterings.
    # scores['normalized_mutual_information'] = evaluation.normalized_mutual_information(reference, communities)[0]
    # Index of resemblance for overlapping, complete coverage, network clusterings.
    try:
        scores['omega'] = evaluation.omega(reference, communities).score
    except:
        scores['omega'] = np.nan
    # Overlapping Normalized Mutual Information between two clusterings.
    try:
        scores['overlapping_normalized_mutual_information_LFK'] = evaluation.overlapping_normalized_mutual_information_LFK(reference, communities)[0]
    except:
        scores['overlapping_normalized_mutual_information_LFK']  = np.nan
    # Overlapping Normalized Mutual Information between two clusterings.
    # scores['overlapping_normalized_mutual_information_MGH'] =  evaluation.overlapping_normalized_mutual_information_MGH(reference, communities)[0]
    # Variation of Information among two nodes partitions.
    # scores['variation_of_information'] =  evaluation.variation_of_information(reference, communities)[0]

    # scores['avg_distance'] = evaluation.avg_distance(g,communities, summary=True)
    try:
        scores['avg_embeddedness'] = evaluation.avg_embeddedness(g,communities, summary=True).score
    except:
        scores['avg_embeddedness'] = np.nan
    try:
        scores['average_internal_degree'] = evaluation.average_internal_degree(g,communities, summary=True).score
    except:
        scores['average_internal_degree'] = np.nan
    # scores['avg_transitivity']  = evaluation.avg_transitivity(g,communities, summary=True)
    # Fraction of total edge volume that points outside the community.
    try:
        scores['conductance']  = evaluation.conductance(g,communities, summary=True).score
    except:
        scores['conductance'] = np.nan
    # Fraction of existing edges (out of all possible edges) leaving the community.
    try:
        scores['cut_ratio']  = evaluation.cut_ratio(g,communities, summary=True).score
    except:
        scores['cut_ratio'] = np.nan

    # Number of edges internal to the community
    try:
        scores['edges_inside']  = evaluation.edges_inside(g,communities, summary=True).score
    except:
        scores['edges_inside'] = np.nan
    # Number of edges per community node that point outside the cluster
    try:
        scores['expansion']  = evaluation.expansion(g,communities, summary=True).score
    except:
        scores['expansion'] = np.nan
    # Fraction of community nodes of having internal degree higher than the median degree value.
    try:
        scores['fraction_over_median_degree']  = evaluation.fraction_over_median_degree(g,communities, summary=True).score
    except:
        scores['fraction_over_median_degree'] = np.nan
    # The hub dominance of a community is defined as the ratio of the degree of its most connected node w.r.t. the theoretically maximal degree within the community.
    # scores['hub_dominance']  = evaluation.hub_dominance(g,communities, summary=True)
    # The internal density of the community set.
    try:
        scores['internal_edge_density'] = evaluation.internal_edge_density(g,communities, summary=True).score
    except:
        scores['internal_edge_density'] = np.nan
    # Normalized variant of the Cut-Ratio
    try:
        scores['normalized_cut']  = evaluation.normalized_cut(g,communities, summary=True).score
    except:
        scores['normalized_cut'] = np.nan
    # Maximum fraction of edges of a node of a community that point outside the community itself.
    # scores['max_odf']  = evaluation.max_odf(g,communities, summary=True)
    # Average fraction of edges of a node of a community that point outside the community itself.
    # scores['avg_odf']  = evaluation.avg_odf(g,communities, summary=True)
    # Fraction of nodes in S that have fewer edges pointing inside than to the outside of the community.
    # scores['flake_odf']  = evaluation.flake_odf(g,communities, summary=True)
    # The scaled density of a community is defined as the ratio of the community density w.r.t. the complete graph density.
    try:
        scores['scaled_density']  = evaluation.scaled_density(g,communities, summary=True).score
    except:
        scores['scaled_density'] = np.nan
    # Significance estimates how likely a partition of dense communities appear in a random graph.
    try:
        scores['significance'] = evaluation.significance(g,communities).score
    except:
        scores['significance'] = np.nan
    # Size is the number of nodes in the community
    try:
        scores['size']  = evaluation.size(g,communities, summary=True).score
    except:
        scores['size'] = np.nan
    # Surprise is statistical approach proposes a quality metric assuming that edges between vertices emerge randomly according to a hyper-geometric distribution.
    # According to the Surprise metric, the higher the score of a partition, the less likely it is resulted from a random realization, the better the quality of the community structure.
    try:
        scores['surprise'] = evaluation.surprise(g,communities).score
    except:
        scores['surprise'] = np.nan

    try:
        scores['modularity_density'] = evaluation.modularity_density(g,communities).score
    except:
        scores['modularity_density'] = np.nan

    # Fraction of community nodes that belong to a triad.
    # scores['triangle_participation_ratio']  = evaluation.triangle_participation_ratio(g,communities, summary=True)
    # Purity is the product of the frequencies of the most frequent labels carried by the nodes within the communities
    # scores['purity'] = evaluation.purity(communities)
    return scores
Exemplo n.º 4
0
    df = pd.DataFrame(forDf)
    df.to_csv('cdlib_results_legacy_part.csv')

    graphStatistics = {'graph': [], 'statistic': [], 'value':[]}
    statistics = {
        '$|V|$': lambda graph, reference: len(graph.nodes),
        '$|E|$': lambda graph, reference: len(graph.edges),
        '$\\hat{k}$': lambda graph, reference: len(reference.communities),
        '$\\hat{o}$': lambda graph, reference: getOverlappingNumber(reference.communities),
        '$\\hat{c}$': lambda graph, reference: nx.average_clustering(graph),
        '\\shortstack[l]{normalized \\\\ cut}': lambda graph, reference: evaluation.normalized_cut(graph,reference, summary=True).score,
        '\\shortstack[l]{internal \\\\ edge \\\\density}': lambda graph, reference: evaluation.internal_edge_density(graph,reference, summary=True).score,
        'significance': lambda graph, reference: evaluation.significance(graph,reference, summary=True).score,
        '\\shortstack[l]{avarage \\\\ internal \\\\degree}': lambda graph, reference: evaluation.average_internal_degree(graph,reference, summary=True).score,
        '\\shortstack[l]{modularity \\\\ density}': lambda graph, reference: evaluation.modularity_density(graph,reference).score,
    }

    for graph, name, reference in zip(graphs, names, references):
        for statistic_name, statistic in statistics.items():
            graphStatistics['graph'].append(name)
            graphStatistics['statistic'].append(statistic_name)
            graphStatistics['value'].append(statistic(graph, reference) )

    graphStatisticsDF = pd.DataFrame(graphStatistics)
    graphStatisticsDF.to_csv('../Results/graphs_stats_legacy.csv')

    # method that were not used, or don't work by some reason

    # wrong implementaion of amplified commute distance
    # results_scoring['lpam_amp'] = getResultsParallel(
Exemplo n.º 5
0
print(G.nodes())
print(G.edges())

#remove outliers/self-loops
G.remove_edges_from(nx.selfloop_edges(G))
G = nx.k_core(G,k=2)

#Louvain/infomap algorithm and graph plot
#coms = algorithms.louvain(G)
coms = algorithms.infomap(G)
pos = nx.spring_layout(G)
viz.plot_community_graph(G, coms, figsize=(8, 8), node_size=200, plot_overlaps=False, plot_labels=True, cmap=None, top_k=None, min_size=None)
viz.plot_network_clusters(G, coms, position=None, figsize=(8, 8), node_size=200, plot_overlaps=False, plot_labels=False, cmap=None, top_k=None, min_size=None)

#converting this to an nx graph for calculations.
mod = evaluation.modularity_density(G,coms)
print(mod)

#calculating modularity
mod = evaluation.modularity_density(G,coms)
print(mod)

#calculating purity
#communities = eva(G, coms)
#pur = evaluation.purity(communities)
#print(pur)

#calculating avg embeddedness
ave = evaluation.avg_embeddedness(G,coms)
print(ave)