示例#1
0
    def compute_features(self):

        wiener_index = lambda graph: nx.wiener_index(graph)
        self.add_feature(
            "wiener index",
            wiener_index,
            "The wiener index is defined as the sum of the lengths of the shortest paths between all pairs of vertices",
            InterpretabilityScore(4),
        )

        estrada_index = lambda graph: nx.estrada_index(graph)
        self.add_feature(
            "estrada_index",
            estrada_index,
            "The Estrada Index is a topological index of protein folding or 3D compactness",
            InterpretabilityScore(4),
        )
def do_something(root, tree, total_graph, parent, health_values, file_diffs, commit_count, actor_count,
                 closeness_values, edge_tolerance=0.5, init_time_val=5, deprecation_val=1):
    update_actors(tree, total_graph, root)
    date = tree.node[root]['date']
    date = time.mktime(time.strptime(date, "%Y-%m-%dT%H:%M:%SZ"))
    if parent:
        update_edge(total_graph, root, parent, tree, file_diffs, edge_tolerance=edge_tolerance,
                    init_time_val=init_time_val, deprecation_val=deprecation_val)
    for node in total_graph.nodes():
        importance = 0.
        for edge in total_graph[node]:
            importance += total_graph[node][edge]['weight']
        total_graph.node[node]['importance'][date] = importance
    health_values.append((date, nx.estrada_index(total_graph)))
    closeness_values.append((date, np.mean(nx.closeness_centrality(total_graph).values())))
    actor_count.append((date, len(total_graph)))
    if len(commit_count) > 0:
        commit_count.append((date, commit_count[-1][1] + 1))
    else:
        commit_count.append((date, 1))
示例#3
0
def compute_summaries(G):
    """ Compute network features, computational times and their nature.
    
    Evaluate 54 summary statistics of a network G, plus 4 noise variables,
    store the computational time to evaluate each summary statistic, and keep
    track of their nature (discrete or not).
        
        Args:
            G (networkx.classes.graph.Graph):
                an undirected networkx graph.
        
        Returns:
            resDicts (tuple): 
                a tuple containing the elements:
                - dictSums (dict): a dictionary with the name of the summaries
                as keys and the summary statistic values as values;
                - dictTimes (dict): a dictionary with the name of the summaries
                as keys and the time to compute each one as values;
                - dictIsDist (dict): a dictionary indicating if the summary is 
                discrete (True) or not (False).
                
    """

    dictSums = dict()  # Will store the summary statistic values
    dictTimes = dict()  # Will store the evaluation times
    dictIsDisc = dict()  # Will store the summary statistic nature

    # Extract the largest connected component
    Gcc = sorted(nx.connected_components(G), key=len, reverse=True)
    G_lcc = G.subgraph(Gcc[0])

    # Number of edges
    start = time.time()
    dictSums["num_edges"] = G.number_of_edges()
    dictTimes["num_edges"] = time.time() - start
    dictIsDisc["num_edges"] = True

    # Number of connected components
    start = time.time()
    dictSums["num_of_CC"] = nx.number_connected_components(G)
    dictTimes["num_of_CC"] = time.time() - start
    dictIsDisc["num_of_CC"] = True

    # Number of nodes in the largest connected component
    start = time.time()
    dictSums["num_nodes_LCC"] = nx.number_of_nodes(G_lcc)
    dictTimes["num_nodes_LCC"] = time.time() - start
    dictIsDisc["num_nodes_LCC"] = True

    # Number of edges in the largest connected component
    start = time.time()
    dictSums["num_edges_LCC"] = G_lcc.number_of_edges()
    dictTimes["num_edges_LCC"] = time.time() - start
    dictIsDisc["num_edges_LCC"] = True

    # Diameter of the largest connected component
    start = time.time()
    dictSums["diameter_LCC"] = nx.diameter(G_lcc)
    dictTimes["diameter_LCC"] = time.time() - start
    dictIsDisc["diameter_LCC"] = True

    # Average geodesic distance (shortest path length in the LCC)
    start = time.time()
    dictSums["avg_geodesic_dist_LCC"] = nx.average_shortest_path_length(G_lcc)
    dictTimes["avg_geodesic_dist_LCC"] = time.time() - start
    dictIsDisc["avg_geodesic_dist_LCC"] = False

    # Average degree of the neighborhood of each node
    start = time.time()
    dictSums["avg_deg_connectivity"] = np.mean(
        list(nx.average_degree_connectivity(G).values()))
    dictTimes["avg_deg_connectivity"] = time.time() - start
    dictIsDisc["avg_deg_connectivity"] = False

    # Average degree of the neighbors of each node in the LCC
    start = time.time()
    dictSums["avg_deg_connectivity_LCC"] = np.mean(
        list(nx.average_degree_connectivity(G_lcc).values()))
    dictTimes["avg_deg_connectivity_LCC"] = time.time() - start
    dictIsDisc["avg_deg_connectivity_LCC"] = False

    # Recover the degree distribution
    start_degree_extract = time.time()
    degree_vals = list(dict(G.degree()).values())
    degree_extract_time = time.time() - start_degree_extract

    # Entropy of the degree distribution
    start = time.time()
    dictSums["degree_entropy"] = ss.entropy(degree_vals)
    dictTimes["degree_entropy"] = time.time() - start + degree_extract_time
    dictIsDisc["degree_entropy"] = False

    # Maximum degree
    start = time.time()
    dictSums["degree_max"] = max(degree_vals)
    dictTimes["degree_max"] = time.time() - start + degree_extract_time
    dictIsDisc["degree_max"] = True

    # Average degree
    start = time.time()
    dictSums["degree_mean"] = np.mean(degree_vals)
    dictTimes["degree_mean"] = time.time() - start + degree_extract_time
    dictIsDisc["degree_mean"] = False

    # Median degree
    start = time.time()
    dictSums["degree_median"] = np.median(degree_vals)
    dictTimes["degree_median"] = time.time() - start + degree_extract_time
    dictIsDisc["degree_median"] = False

    # Standard deviation of the degree distribution
    start = time.time()
    dictSums["degree_std"] = np.std(degree_vals)
    dictTimes["degree_std"] = time.time() - start + degree_extract_time
    dictIsDisc["degree_std"] = False

    # Quantile 25%
    start = time.time()
    dictSums["degree_q025"] = np.quantile(degree_vals, 0.25)
    dictTimes["degree_q025"] = time.time() - start + degree_extract_time
    dictIsDisc["degree_q025"] = False

    # Quantile 75%
    start = time.time()
    dictSums["degree_q075"] = np.quantile(degree_vals, 0.75)
    dictTimes["degree_q075"] = time.time() - start + degree_extract_time
    dictIsDisc["degree_q075"] = False

    # Average geodesic distance
    start = time.time()
    dictSums["avg_shortest_path_length_LCC"] = nx.average_shortest_path_length(
        G_lcc)
    dictTimes["avg_shortest_path_length_LCC"] = time.time() - start
    dictIsDisc["avg_shortest_path_length_LCC"] = False

    # Average global efficiency:
    # The efficiency of a pair of nodes in a graph is the multiplicative
    # inverse of the shortest path distance between the nodes.
    # The average global efficiency of a graph is the average efficiency of
    # all pairs of nodes.
    start = time.time()
    dictSums["avg_global_efficiency"] = nx.global_efficiency(G)
    dictTimes["avg_global_efficiency"] = time.time() - start
    dictIsDisc["avg_global_efficiency"] = False

    # Harmonic mean which is 1/avg_global_efficiency
    start = time.time()
    dictSums["harmonic_mean"] = nx.global_efficiency(G)
    dictTimes["harmonic_mean"] = time.time() - start
    dictIsDisc["harmonic_mean"] = False

    # Average local efficiency
    # The local efficiency of a node in the graph is the average global
    # efficiency of the subgraph induced by the neighbors of the node.
    # The average local efficiency is the average of the
    # local efficiencies of each node.
    start = time.time()
    dictSums["avg_local_efficiency_LCC"] = nx.local_efficiency(G_lcc)
    dictTimes["avg_local_efficiency_LCC"] = time.time() - start
    dictIsDisc["avg_local_efficiency_LCC"] = False

    # Node connectivity
    # The node connectivity is equal to the minimum number of nodes that
    # must be removed to disconnect G or render it trivial.
    # Only on the largest connected component here.
    start = time.time()
    dictSums["node_connectivity_LCC"] = nx.node_connectivity(G_lcc)
    dictTimes["node_connectivity_LCC"] = time.time() - start
    dictIsDisc["node_connectivity_LCC"] = True

    # Edge connectivity
    # The edge connectivity is equal to the minimum number of edges that
    # must be removed to disconnect G or render it trivial.
    # Only on the largest connected component here.
    start = time.time()
    dictSums["edge_connectivity_LCC"] = nx.edge_connectivity(G_lcc)
    dictTimes["edge_connectivity_LCC"] = time.time() - start
    dictIsDisc["edge_connectivity_LCC"] = True

    # Graph transitivity
    # 3*times the number of triangles divided by the number of triades
    start = time.time()
    dictSums["transitivity"] = nx.transitivity(G)
    dictTimes["transitivity"] = time.time() - start
    dictIsDisc["transitivity"] = False

    # Number of triangles
    start = time.time()
    dictSums["num_triangles"] = np.sum(list(nx.triangles(G).values())) / 3
    dictTimes["num_triangles"] = time.time() - start
    dictIsDisc["num_triangles"] = True

    # Estimate of the average clustering coefficient of G:
    # Average local clustering coefficient, with local clustering coefficient
    # defined as C_i = (nbr of pairs of neighbors of i that are connected)/(nbr of pairs of neighbors of i)
    start = time.time()
    dictSums["avg_clustering_coef"] = nx.average_clustering(G)
    dictTimes["avg_clustering_coef"] = time.time() - start
    dictIsDisc["avg_clustering_coef"] = False

    # Square clustering (averaged over nodes):
    # the fraction of possible squares that exist at the node.

    # We average it over nodes
    start = time.time()
    dictSums["square_clustering_mean"] = np.mean(
        list(nx.square_clustering(G).values()))
    dictTimes["square_clustering_mean"] = time.time() - start
    dictIsDisc["square_clustering_mean"] = False

    # We compute the median
    start = time.time()
    dictSums["square_clustering_median"] = np.median(
        list(nx.square_clustering(G).values()))
    dictTimes["square_clustering_median"] = time.time() - start
    dictIsDisc["square_clustering_median"] = False

    # We compute the standard deviation
    start = time.time()
    dictSums["square_clustering_std"] = np.std(
        list(nx.square_clustering(G).values()))
    dictTimes["square_clustering_std"] = time.time() - start
    dictIsDisc["square_clustering_std"] = False

    # Number of 2-cores
    start = time.time()
    dictSums["num_2cores"] = len(nx.k_core(G, k=2))
    dictTimes["num_2cores"] = time.time() - start
    dictIsDisc["num_2cores"] = True

    # Number of 3-cores
    start = time.time()
    dictSums["num_3cores"] = len(nx.k_core(G, k=3))
    dictTimes["num_3cores"] = time.time() - start
    dictIsDisc["num_3cores"] = True

    # Number of 4-cores
    start = time.time()
    dictSums["num_4cores"] = len(nx.k_core(G, k=4))
    dictTimes["num_4cores"] = time.time() - start
    dictIsDisc["num_4cores"] = True

    # Number of 5-cores
    start = time.time()
    dictSums["num_5cores"] = len(nx.k_core(G, k=5))
    dictTimes["num_5cores"] = time.time() - start
    dictIsDisc["num_5cores"] = True

    # Number of 6-cores
    start = time.time()
    dictSums["num_6cores"] = len(nx.k_core(G, k=6))
    dictTimes["num_6cores"] = time.time() - start
    dictIsDisc["num_6cores"] = True

    # Number of k-shells
    # The k-shell is the subgraph induced by nodes with core number k.
    # That is, nodes in the k-core that are not in the k+1-core

    # Number of 2-shells
    start = time.time()
    dictSums["num_2shells"] = len(nx.k_shell(G, 2))
    dictTimes["num_2shells"] = time.time() - start
    dictIsDisc["num_2shells"] = True

    # Number of 3-shells
    start = time.time()
    dictSums["num_3shells"] = len(nx.k_shell(G, 3))
    dictTimes["num_3shells"] = time.time() - start
    dictIsDisc["num_3shells"] = True

    # Number of 4-shells
    start = time.time()
    dictSums["num_4shells"] = len(nx.k_shell(G, 4))
    dictTimes["num_4shells"] = time.time() - start
    dictIsDisc["num_4shells"] = True

    # Number of 5-shells
    start = time.time()
    dictSums["num_5shells"] = len(nx.k_shell(G, 5))
    dictTimes["num_5shells"] = time.time() - start
    dictIsDisc["num_5shells"] = True

    # Number of 6-shells
    start = time.time()
    dictSums["num_6shells"] = len(nx.k_shell(G, 6))
    dictTimes["num_6shells"] = time.time() - start
    dictIsDisc["num_6shells"] = True

    start = time.time()
    listOfCliques = list(nx.enumerate_all_cliques(G))
    enum_all_cliques_time = time.time() - start

    # Number of 4-cliques
    start = time.time()
    n4Clique = 0
    for li in listOfCliques:
        if len(li) == 4:
            n4Clique += 1
    dictSums["num_4cliques"] = n4Clique
    dictTimes["num_4cliques"] = time.time() - start + enum_all_cliques_time
    dictIsDisc["num_4cliques"] = True

    # Number of 5-cliques
    start = time.time()
    n5Clique = 0
    for li in listOfCliques:
        if len(li) == 5:
            n5Clique += 1
    dictSums["num_5cliques"] = n5Clique
    dictTimes["num_5cliques"] = time.time() - start + enum_all_cliques_time
    dictIsDisc["num_5cliques"] = True

    # Maximal size of a clique in the graph
    start = time.time()
    dictSums["max_clique_size"] = len(approximation.clique.max_clique(G))
    dictTimes["max_clique_size"] = time.time() - start
    dictIsDisc["max_clique_size"] = True

    # Approximated size of a large clique in the graph
    start = time.time()
    dictSums["large_clique_size"] = approximation.large_clique_size(G)
    dictTimes["large_clique_size"] = time.time() - start
    dictIsDisc["large_clique_size"] = True

    # Number of shortest path of size k
    start = time.time()
    listOfPLength = list(nx.shortest_path_length(G))
    path_length_time = time.time() - start

    # when k = 3
    start = time.time()
    n3Paths = 0
    for node in G.nodes():
        tmp = list(listOfPLength[node][1].values())
        n3Paths += tmp.count(3)
    dictSums["num_shortest_3paths"] = n3Paths / 2
    dictTimes["num_shortest_3paths"] = time.time() - start + path_length_time
    dictIsDisc["num_shortest_3paths"] = True

    # when k = 4
    start = time.time()
    n4Paths = 0
    for node in G.nodes():
        tmp = list(listOfPLength[node][1].values())
        n4Paths += tmp.count(4)
    dictSums["num_shortest_4paths"] = n4Paths / 2
    dictTimes["num_shortest_4paths"] = time.time() - start + path_length_time
    dictIsDisc["num_shortest_4paths"] = True

    # when k = 5
    start = time.time()
    n5Paths = 0
    for node in G.nodes():
        tmp = list(listOfPLength[node][1].values())
        n5Paths += tmp.count(5)
    dictSums["num_shortest_5paths"] = n5Paths / 2
    dictTimes["num_shortest_5paths"] = time.time() - start + path_length_time
    dictIsDisc["num_shortest_5paths"] = True

    # when k = 6
    start = time.time()
    n6Paths = 0
    for node in G.nodes():
        tmp = list(listOfPLength[node][1].values())
        n6Paths += tmp.count(6)
    dictSums["num_shortest_6paths"] = n6Paths / 2
    dictTimes["num_shortest_6paths"] = time.time() - start + path_length_time
    dictIsDisc["num_shortest_6paths"] = True

    # Size of the minimum (weight) node dominating set:
    # A subset of nodes where each node not in the subset has for direct
    # neighbor a node of the dominating set.
    start = time.time()
    T = approximation.min_weighted_dominating_set(G)
    dictSums["size_min_node_dom_set"] = len(T)
    dictTimes["size_min_node_dom_set"] = time.time() - start
    dictIsDisc["size_min_node_dom_set"] = True

    # Idem but with the edge dominating set
    start = time.time()
    T = approximation.min_edge_dominating_set(G)
    dictSums["size_min_edge_dom_set"] = 2 * len(
        T)  # times 2 to have a number of nodes
    dictTimes["size_min_edge_dom_set"] = time.time() - start
    dictIsDisc["size_min_edge_dom_set"] = True

    # The Wiener index of a graph is the sum of the shortest-path distances
    # between each pair of reachable nodes. For pairs of nodes in undirected graphs,
    # only one orientation of the pair is counted.
    # (On LCC otherwise inf)
    start = time.time()
    dictSums["wiener_index_LCC"] = nx.wiener_index(G_lcc)
    dictTimes["wiener_index_LCC"] = time.time() - start
    dictIsDisc["wiener_index_LCC"] = True

    # Betweenness node centrality (averaged over nodes):
    # at node u it is defined as B_u = sum_i,j sigma(i,u,j)/sigma(i,j)
    # where sigma is the number of shortest path between i and j going through u or not

    start = time.time()
    betweenness = list(nx.betweenness_centrality(G).values())
    time_betweenness = time.time() - start

    # Averaged across nodes
    start = time.time()
    dictSums["betweenness_centrality_mean"] = np.mean(betweenness)
    dictTimes["betweenness_centrality_mean"] = time.time(
    ) - start + time_betweenness
    dictIsDisc["betweenness_centrality_mean"] = False

    # Maximum across nodes
    start = time.time()
    dictSums["betweenness_centrality_max"] = max(betweenness)
    dictTimes["betweenness_centrality_max"] = time.time(
    ) - start + time_betweenness
    dictIsDisc["betweenness_centrality_max"] = False

    # Central point dominance
    # CPD = sum_u(B_max - B_u)/(N-1)
    start = time.time()
    dictSums["central_point_dominance"] = sum(
        max(betweenness) - np.array(betweenness)) / (len(betweenness) - 1)
    dictTimes["central_point_dominance"] = time.time(
    ) - start + time_betweenness
    dictIsDisc["central_point_dominance"] = False

    # Estrata index : sum_i^n exp(lambda_i)
    # with n the number of nodes, lamda_i the i-th eigen value of the adjacency matrix of G
    start = time.time()
    dictSums["Estrata_index"] = nx.estrada_index(G)
    dictTimes["Estrata_index"] = time.time() - start
    dictIsDisc["Estrata_index"] = False

    # Eigenvector centrality
    # For each node, it is the average eigenvalue centrality of its neighbors,
    # where centrality of node i is taken as the i-th coordinate of x
    # such that Ax = lambda*x (for the maximal eigen value)

    # Averaged
    start = time.time()
    dictSums["avg_eigenvec_centrality"] = np.mean(
        list(nx.eigenvector_centrality_numpy(G).values()))
    dictTimes["avg_eigenvec_centrality"] = time.time() - start
    dictIsDisc["avg_eigenvec_centrality"] = False

    # Maximum
    start = time.time()
    dictSums["max_eigenvec_centrality"] = max(
        list(nx.eigenvector_centrality_numpy(G).values()))
    dictTimes["max_eigenvec_centrality"] = time.time() - start
    dictIsDisc["max_eigenvec_centrality"] = False

    ### Noise generation ###

    # Noise simulated from a Normal(0,1) distribution
    start = time.time()
    dictSums["noise_Gauss"] = ss.norm.rvs(0, 1)
    dictTimes["noise_Gauss"] = time.time() - start
    dictIsDisc["noise_Gauss"] = False

    # Noise simulated from a Uniform distribution [0-50]
    start = time.time()
    dictSums["noise_Unif"] = ss.uniform.rvs(0, 50)
    dictTimes["noise_Unif"] = time.time() - start
    dictIsDisc["noise_Unif"] = False

    # Noise simulated from a Bernoulli B(0.5) distribution
    start = time.time()
    dictSums["noise_Bern"] = ss.bernoulli.rvs(0.5)
    dictTimes["noise_Bern"] = time.time() - start
    dictIsDisc["noise_Bern"] = True

    # Noise simulated from a discrete uniform distribution [0,50[
    start = time.time()
    dictSums["noise_disc_Unif"] = ss.randint.rvs(0, 50)
    dictTimes["noise_disc_Unif"] = time.time() - start
    dictIsDisc["noise_disc_Unif"] = True

    resDicts = (dictSums, dictTimes, dictIsDisc)

    return resDicts
示例#4
0
    def centrality(self):
        result = {}
        result['degree_centrality'] = nx.degree_centrality(self.graph)

        if self.directed == 'directed':
            result['in_degree_centrality'] = nx.in_degree_centrality(
                self.graph)
            result['out_degree_centrality'] = nx.out_degree_centrality(
                self.graph)

        result['closeness_centrality'] = nx.closeness_centrality(self.graph)
        result['betweenness_centrality'] = nx.betweenness_centrality(
            self.graph)

        # fix the tuple cant decode into json problem
        stringify_temp = {}
        temp = nx.edge_betweenness_centrality(self.graph)
        for key in temp.keys():
            stringify_temp[str(key)] = temp[key]
        result['edge_betweenness_centrality'] = stringify_temp

        if self.directed == 'undirected':
            result[
                'current_flow_closeness_centrality'] = nx.current_flow_closeness_centrality(
                    self.graph)
            result[
                'current_flow_betweenness_centrality'] = nx.current_flow_betweenness_centrality(
                    self.graph)

            stringify_temp = {}
            temp = nx.edge_current_flow_betweenness_centrality(self.graph)
            for key in temp.keys():
                stringify_temp[str(key)] = temp[key]
            result['edge_current_flow_betweenness_centrality'] = stringify_temp

            result[
                'approximate_current_flow_betweenness_centrality'] = nx.approximate_current_flow_betweenness_centrality(
                    self.graph)
            result['eigenvector_centrality'] = nx.eigenvector_centrality(
                self.graph)
            result[
                'eigenvector_centrality_numpy'] = nx.eigenvector_centrality_numpy(
                    self.graph)
            result['katz_centrality'] = nx.katz_centrality(self.graph)
            result['katz_centrality_numpy'] = nx.katz_centrality_numpy(
                self.graph)
            result['communicability'] = nx.communicability(self.graph)
            result['communicability_exp'] = nx.communicability_exp(self.graph)
            result[
                'communicability_centrality'] = nx.communicability_centrality(
                    self.graph)
            result[
                'communicability_centrality_exp'] = nx.communicability_centrality_exp(
                    self.graph)
            result[
                'communicability_betweenness_centrality'] = nx.communicability_betweenness_centrality(
                    self.graph)
            result['estrada_index'] = nx.estrada_index(self.graph)

        result['load_centrality'] = nx.load_centrality(self.graph)

        stringify_temp = {}
        temp = nx.edge_load(self.graph)
        for key in temp.keys():
            stringify_temp[str(key)] = temp[key]
        result['edge_load'] = stringify_temp
        result['dispersion'] = nx.dispersion(self.graph)

        fname_centra = self.DIR + '/centrality.json'
        with open(fname_centra, "w") as f:
            json.dump(result, f, cls=SetEncoder, indent=2)
        print(fname_centra)
示例#5
0
 def calculate(graph):
     return Utils.approx_to_int(nx.estrada_index(graph))
    def compute_subgraph_center(self, subgraph):

        if self.method == 'betweenness_centrality':
            d = nx.betweenness_centrality(subgraph, weight='weight')
            center = max(d, key=d.get)

        elif self.method == 'betweenness_centrality_subset':
            d = nx.betweenness_centrality_subset(subgraph, weight='weight')
            center = max(d, key=d.get)

        elif self.method == 'information_centrality':
            d = nx.information_centrality(subgraph, weight='weight')
            center = max(d, key=d.get)

        elif self.method == 'local_reaching_centrality':

            d = {}
            for n in self.G.nodes():
                d[n] = nx.local_reaching_centrality(self.G, n, weight='weight')

            center = max(d, key=d.get)

        elif self.method == 'voterank':
            d = nx.voterank(subgraph)
            center = max(d, key=d.get)

        elif self.method == 'percolation_centrality':
            d = nx.percolation_centrality(subgraph, weight='weight')
            center = max(d, key=d.get)

        elif self.method == 'subgraph_centrality':
            d = nx.subgraph_centrality(subgraph)
            center = max(d, key=d.get)

        elif self.method == 'subgraph_centrality_exp':
            d = nx.subgraph_centrality_exp(subgraph)
            center = max(d, key=d.get)

        elif self.method == 'estrada_index':
            d = nx.estrada_index(subgraph)
            center = max(d, key=d.get)

        elif self.method == 'second_order_centrality':
            d = nx.second_order_centrality(subgraph)
            center = max(d, key=d.get)

        elif self.method == 'eigenvector_centrality':

            d = nx.eigenvector_centrality(subgraph, weight='weight')
            center = max(d, key=d.get)
        elif self.method == 'load_centrality':

            d = nx.load_centrality(subgraph, weight='weight')
            center = max(d, key=d.get)

        elif self.method == 'closeness_centrality':
            d = nx.closeness_centrality(subgraph)
            center = max(d, key=d.get)

        elif self.method == 'current_flow_closeness_centrality':
            d = nx.current_flow_closeness_centrality(subgraph, weight='weight')
            center = max(d, key=d.get)

        elif self.method == 'current_flow_betweenness_centrality':
            d = nx.current_flow_betweenness_centrality(subgraph,
                                                       weight='weight')
            center = max(d, key=d.get)

        elif self.method == 'current_flow_betweenness_centrality_subset':
            d = nx.current_flow_betweenness_centrality_subset(subgraph,
                                                              weight='weight')
            center = max(d, key=d.get)

        elif self.method == 'approximate_current_flow_betweenness_centrality':
            d = nx.approximate_current_flow_betweenness_centrality(
                subgraph, weight='weight')
            center = max(d, key=d.get)

        elif self.method == 'harmonic_centrality':
            d = nx.harmonic_centrality(subgraph)
            center = max(d, key=d.get)

        elif self.method == 'page_rank':

            d = nx.pagerank(subgraph, weight='weight')
            center = max(d, key=d.get)

        elif self.method == 'hits':

            d = nx.hits(subgraph)
            center = max(d, key=d.get)

        elif self.method == 'katz_centrality':
            d = nx.katz_centrality(subgraph, weight='weight')
            center = max(d, key=d.get)

        else:
            new_centers = nx.center(subgraph)

            # new_centers gives a list of centers and here we just pick one randomly --not good for stability
            # to do : find a better way to choose the center--make it stable

            index = random.randint(0, len(new_centers) - 1)

            center = new_centers[index]
        return center
示例#7
0
def centrality(g):
    nx_g = pyzx2nx(g.copy())
    return nx.estrada_index(nx_g)
示例#8
0
#This script retur the property of a given .dot file
import networkx as nx
import sys, getopt

filename=sys.argv[1]
curNet = nx.Graph(nx.read_dot(filename))

bc=nx.betweenness_centrality(curNet)
spl=nx.average_shortest_path_length(curNet)
maxbc=bc[max(bc,key=bc.get)]
minbc=bc[min(bc,key=bc.get)]
meanbc=sum(bc.values())/len(bc)
estrada_i=nx.estrada_index(curNet)
print( str(spl) + ',' + str(maxbc) + ',' + str(minbc)  + ','+ str(meanbc) + ',' + str(estrada_i))

def get_graph(Mat_D, Threshold, percentageConnections=False, complet=False):
    import scipy.io as sio
    import numpy as np
    import networkx as nx
    import pandas as pd
    import os
    Data = sio.loadmat(Mat_D)
    matX = Data['Correlation']  #[:tamn,:tamn]
    labels = Data['labels']
    print(np.shape(matX))
    print(np.shape(labels))
    print(np.min(matX), np.max(matX))

    if percentageConnections:
        if percentageConnections > 0 and percentageConnections < 1:
            for i in range(-100, 100):
                per = np.sum(matX > i / 100.) / np.size(matX)
                if per <= Threshold:
                    Threshold = i / 100.
                    break
            print(Threshold)
        else:
            print('The coefficient is outside rank')

    #Lista de conexion del grafo
    row, col = np.shape(matX)
    e = []
    for i in range(1, row):
        for j in range(i):
            if complet:
                e.append((labels[i], labels[j], matX[i, j]))
            else:
                if matX[i, j] > Threshold:
                    e.append((labels[i], labels[j], matX[i, j]))

    print(np.shape(e)[0], int(((row - 1) * row) / 2))

    #Generar grafo
    G = nx.Graph()
    G.add_weighted_edges_from(e)
    labelNew = list(G.nodes)

    #Metricas por grafo (ponderados)
    Dpc = nx.degree_pearson_correlation_coefficient(G, weight='weight')
    cluster = nx.average_clustering(G, weight='weight')

    #No ponderados
    estra = nx.estrada_index(G)
    tnsity = nx.transitivity(G)
    conNo = nx.average_node_connectivity(G)
    ac = nx.degree_assortativity_coefficient(G)

    #Metricas por nodo
    tam = 15
    BoolCenV = False
    BoolLoad = False
    alpha = 0.1
    beta = 1.0

    katxCN = nx.katz_centrality_numpy(G,
                                      alpha=alpha,
                                      beta=beta,
                                      weight='weight')
    bcen = nx.betweenness_centrality(G, weight='weight')
    av_nd = nx.average_neighbor_degree(G, weight='weight')
    ctr = nx.clustering(G, weight='weight')
    ranPaN = nx.pagerank_numpy(G, weight='weight')
    Gol_N = nx.hits_numpy(G)
    Dgc = nx.degree_centrality(G)
    cl_ce = nx.closeness_centrality(G)
    cluster_Sq = nx.square_clustering(G)
    centr = nx.core_number(G)
    cami = nx.node_clique_number(G)
    camiN = nx.number_of_cliques(G)
    trian = nx.triangles(G)
    colorG = nx.greedy_color(G)
    try:
        cenVNum = nx.eigenvector_centrality_numpy(G, weight='weight')
        tam = tam + 1
        BoolCenV = True
    except TypeError:
        print(
            "La red es muy pequeña y no se puede calcular este parametro gil")
    except:
        print('NetworkXPointlessConcept: graph null')
    if Threshold > 0:
        carga_cen = nx.load_centrality(G, weight='weight')  #Pesos  positivos
        BoolLoad = True
        tam = tam + 1
    #katxC=nx.katz_centrality(G, alpha=alpha, beta=beta, weight='weight')
    #cenV=nx.eigenvector_centrality(G,weight='weight')
    #cenV=nx.eigenvector_centrality(G,weight='weight')
    #Golp=nx.hits(G)
    #Gol_si=nx.hits_scipy(G)
    #ranPa=nx.pagerank(G, weight='weight')
    #ranPaS=nx.pagerank_scipy(G, weight='weight')

    matrix_datos = np.zeros((tam, np.shape(labelNew)[0]))
    tam = 15
    print(np.shape(matrix_datos))
    lim = np.shape(labelNew)[0]
    for i in range(lim):
        roi = labelNew[i]
        #print(roi)
        matrix_datos[0, i] = katxCN[roi]
        matrix_datos[1, i] = bcen[roi]
        matrix_datos[2, i] = av_nd[roi]
        matrix_datos[3, i] = ctr[roi]
        matrix_datos[4, i] = ranPaN[roi]
        matrix_datos[5, i] = Gol_N[0][roi]
        matrix_datos[6, i] = Gol_N[1][roi]
        matrix_datos[7, i] = Dgc[roi]
        matrix_datos[8, i] = cl_ce[roi]
        matrix_datos[9, i] = cluster_Sq[roi]
        matrix_datos[10, i] = centr[roi]
        matrix_datos[11, i] = cami[roi]
        matrix_datos[12, i] = camiN[roi]
        matrix_datos[13, i] = trian[roi]
        matrix_datos[14, i] = colorG[roi]
        if BoolCenV:
            matrix_datos[15, i] = cenVNum[roi]
            tam = tam + 1
        if BoolLoad:
            matrix_datos[16, i] = carga_cen[roi]
            tam = tam + 1
        #matrix_datos[0,i]=katxC[roi]
        #matrix_datos[2,i]=cenV[roi]
        #matrix_datos[7,i]=Golp[0][roi]
        #matrix_datos[9,i]=Gol_si[0][roi]
        #matrix_datos[10,i]=Golp[1][roi]
        #matrix_datos[12,i]=Gol_si[1][roi]
        #matrix_datos[22,i]=ranPa[roi]
        #matrix_datos[24,i]=ranPaS[roi]
    FuncName = [
        'degree_pearson_correlation_coefficient', 'average_clustering',
        'estrada_index', 'transitivity', 'average_node_connectivity',
        'degree_assortativity_coefficient', 'katz_centrality_numpy',
        'betweenness_centrality', 'average_neighbor_degree', 'clustering',
        'pagerank_numpy', 'hits_numpy0', 'hits_numpy1', 'degree_centrality',
        'closeness_centrality', 'square_clustering', 'core_number',
        'node_clique_number', 'number_of_cliques', 'triangles', 'greedy_color',
        'eigenvector_centrality_numpy', 'load_centrality'
    ]
    frame = pd.DataFrame(matrix_datos)
    frame.columns = labelNew
    frame.index = FuncName[6:tam]

    Resul = os.getcwd()
    out_data = Resul + '/graph_metrics.csv'
    out_mat = Resul + '/graph_metrics_global.mat'

    frame.to_csv(out_data)
    sio.savemat(
        out_mat, {
            FuncName[0]: Dpc,
            FuncName[1]: cluster,
            FuncName[2]: estra,
            FuncName[3]: tnsity,
            FuncName[4]: conNo,
            FuncName[5]: ac
        })
    return out_data, out_mat
示例#10
0
def estgexf():
    global fpname
    indegree_list = []
    outdegree_list = []
    closeness_list = []
    betweenness_list = []
    users_list = []
    clustering_list = []
    file_path = fpname
    output_est = file(file_path + "_stats.txt", "w")
    print >> output_est, "node\tindegree\toutdegree\tcloseness\tbetweenness\tclustering\tcomponents\tdiameter"
    indegree = nx.in_degree_centrality(G)
    outdegree = nx.out_degree_centrality(G)
    closeness = nx.closeness_centrality(G)
    betweenness = nx.betweenness_centrality(G)
    ndG = G.to_undirected()
    clustering = nx.clustering(ndG)
    try:
        diameter = nx.diameter(G)
    except:
        diameter = 1000
    for key, value in clustering.items():
        clustering_list.append(value)
    clusv = sum(clustering_list) / float(len(clustering_list))
    for key, value in indegree.items():
        indegree_list.append(value)
    indv = sum(indegree_list) / float(len(indegree_list))
    for key, value in outdegree.items():
        outdegree_list.append(value)
    outv = sum(outdegree_list) / float(len(outdegree_list))
    for key, value in closeness.items():
        closeness_list.append(value)
    closev = sum(closeness_list) / float(len(closeness_list))
    for key, value in betweenness.items():
        betweenness_list.append(value)
        users_list.append(key)
    betv = sum(betweenness_list) / float(len(betweenness_list))
    components_count = len(users_list)
    for i in users_list:
        pos = users_list.index(i)
        print >> output_est, i, "\t", indegree_list[pos], "\t", outdegree_list[pos], "\t", closeness_list[
            pos
        ], "\t", betweenness_list[pos], "\t", clustering_list[pos], "\t", components_count, "\t", diameter

    output_est.close()
    ascoef = nx.degree_assortativity_coefficient(G)
    estr = nx.estrada_index(ndG)

    print "Network indicators for", fpname
    print ""
    print "---------------------------------------"
    if diameter == 1000:
        print "Diameter: disconnected network"
    else:
        print "Diameter:", diameter
    print "Average clustering:", clusv
    print "Average Indegree centrality:", indv
    print "Average Outdegree centrality:", outv
    print "Average Closeness centrality:", closev
    print "Average Betweenness centrality:", betv
    print "Components count:", components_count
    print "Assortativity coefficient:", ascoef
    print "Estrada index", estr
    print "---------------------------------------"
    print ""
    return False
示例#11
0
                        prev_connected_comp = []
                        for cx in pres_connected_component:
                            prev_connected_comp.append(cx)

                        trajectory_to_node_map_prev = trajectory_to_node_map_pres
                        trajectory_to_node_map_pres = {}

    #                 print("Shailja", R.nodes, R.edges)
                    for node in delete_nodes:
                        if G.has_node(node):
                            G.remove_node(node)

                ind_list = [x + 1 for x in ind_list]

                for j in range(len(traj_list)):
                    if (ind_list[j] >= len(
                            I.trajectories[traj_list[j]].points)):
                        del_ind_tra.append(j)

                traj_list = [
                    i for j, i in enumerate(traj_list) if j not in del_ind_tra
                ]
                ind_list = [
                    i for j, i in enumerate(ind_list) if j not in del_ind_tra
                ]
    print(trkname, len(streamlines), len(R.nodes()), len(R.edges()),
          nx.average_clustering(R), nx.estrada_index(R))
    # except:
    #     print("Error not handled")
def large_graph_metric(G, weight=None, thresh=None):

    # threshold graph edges based on weight
    if thresh is None:
        H = G
    else:
        if weight is None:
            raise ValueError('No weight is given for thresholding')

        rm_edges = []
        for edge in G.edges_iter(data=True):
            if edge[2]['weight'] < thresh:
                rm_edges.append([edge[0], edge[1]])
        H = G.copy()
        H.remove_edges_from(rm_edges)

    # create feature vector
    x = np.ones(13)

    # clustering coefficient
    x[0] = nx.average_clustering(H)

    # node connectivity
    x[1] = nx.node_connectivity(H)

    # average betweeness centrality
    # bc = nx.betweenness_centrality(G, weight=weight)
    bc = nx.betweenness_centrality(G)
    x[2] = float(sum(bc[node] for node in bc)) / len(bc)

    # average closeness centrality
    # cc = nx.closeness_centrality(G, weight=weight)
    cc = nx.closeness_centrality(G)
    x[3] = float(sum(cc[node] for node in cc)) / len(cc)

    # estrada index
    x[4] = nx.estrada_index(H)

    # average degree
    x[5] = float(sum(len(H[u]) for u in H)) / len(H)

    # standard deviation degree
    # x[6] = float(sum((x[4] - len(H[u]))**2)) / len(H)

    # degree assortativity coefficient
    # x[7] = nx.degree_assortativity_coefficient(G, weight=weight)
    x[7] = nx.degree_assortativity_coefficient(G)

    # transitivity
    x[8] = nx.transitivity(H)

    # diameter
    x[9] = nx.diameter(H)

    # eccentricity
    # x[10] = nx.eccentricity(H)

    # periphery
    # x[11] = nx.periphery(H)

    # radius
    x[12] = nx.radius(H)

    return x
示例#13
0
count = 1
l = []
for ind, row in df_cd.iterrows():

    fcode = row['STATECD']
    st = row['STATEFP']
    #print(fcode[0:2]+' ' + fcode[2:]+ ' || ' + str(count) + ' of ' + str(436))
    count += 1
    # open matrix
    if st == state or state == 'all':

        adj = np.loadtxt('adj_mats/' + fcode[0:2] + '_' + fcode + '_dist.txt',
                         delimiter=',').astype(int)
        #G=nx.path_graph(500)
        G = nx.from_numpy_matrix(adj)
        spec = nx.estrada_index(G) / len(G.nodes)  #/adj.shape[0]
        '''  
        deg = np.diag(adj.sum(axis=0)).astype(int)
        lap = deg-adj
        #print(lap.shape)
        lap = lap.astype(int)    
        m=lap
        
        spec = np.real(scipy.linalg.eigvals(m).tolist())
        #print(np.sum(adj))
        spec.sort()
        #print(spec[0:6])
        
        
        
        spec = [np.log(x) for x in spec if x>0]