def compute_features(self): wiener_index = lambda graph: nx.wiener_index(graph) self.add_feature( "wiener index", wiener_index, "The wiener index is defined as the sum of the lengths of the shortest paths between all pairs of vertices", InterpretabilityScore(4), ) estrada_index = lambda graph: nx.estrada_index(graph) self.add_feature( "estrada_index", estrada_index, "The Estrada Index is a topological index of protein folding or 3D compactness", InterpretabilityScore(4), )
def do_something(root, tree, total_graph, parent, health_values, file_diffs, commit_count, actor_count, closeness_values, edge_tolerance=0.5, init_time_val=5, deprecation_val=1): update_actors(tree, total_graph, root) date = tree.node[root]['date'] date = time.mktime(time.strptime(date, "%Y-%m-%dT%H:%M:%SZ")) if parent: update_edge(total_graph, root, parent, tree, file_diffs, edge_tolerance=edge_tolerance, init_time_val=init_time_val, deprecation_val=deprecation_val) for node in total_graph.nodes(): importance = 0. for edge in total_graph[node]: importance += total_graph[node][edge]['weight'] total_graph.node[node]['importance'][date] = importance health_values.append((date, nx.estrada_index(total_graph))) closeness_values.append((date, np.mean(nx.closeness_centrality(total_graph).values()))) actor_count.append((date, len(total_graph))) if len(commit_count) > 0: commit_count.append((date, commit_count[-1][1] + 1)) else: commit_count.append((date, 1))
def compute_summaries(G): """ Compute network features, computational times and their nature. Evaluate 54 summary statistics of a network G, plus 4 noise variables, store the computational time to evaluate each summary statistic, and keep track of their nature (discrete or not). Args: G (networkx.classes.graph.Graph): an undirected networkx graph. Returns: resDicts (tuple): a tuple containing the elements: - dictSums (dict): a dictionary with the name of the summaries as keys and the summary statistic values as values; - dictTimes (dict): a dictionary with the name of the summaries as keys and the time to compute each one as values; - dictIsDist (dict): a dictionary indicating if the summary is discrete (True) or not (False). """ dictSums = dict() # Will store the summary statistic values dictTimes = dict() # Will store the evaluation times dictIsDisc = dict() # Will store the summary statistic nature # Extract the largest connected component Gcc = sorted(nx.connected_components(G), key=len, reverse=True) G_lcc = G.subgraph(Gcc[0]) # Number of edges start = time.time() dictSums["num_edges"] = G.number_of_edges() dictTimes["num_edges"] = time.time() - start dictIsDisc["num_edges"] = True # Number of connected components start = time.time() dictSums["num_of_CC"] = nx.number_connected_components(G) dictTimes["num_of_CC"] = time.time() - start dictIsDisc["num_of_CC"] = True # Number of nodes in the largest connected component start = time.time() dictSums["num_nodes_LCC"] = nx.number_of_nodes(G_lcc) dictTimes["num_nodes_LCC"] = time.time() - start dictIsDisc["num_nodes_LCC"] = True # Number of edges in the largest connected component start = time.time() dictSums["num_edges_LCC"] = G_lcc.number_of_edges() dictTimes["num_edges_LCC"] = time.time() - start dictIsDisc["num_edges_LCC"] = True # Diameter of the largest connected component start = time.time() dictSums["diameter_LCC"] = nx.diameter(G_lcc) dictTimes["diameter_LCC"] = time.time() - start dictIsDisc["diameter_LCC"] = True # Average geodesic distance (shortest path length in the LCC) start = time.time() dictSums["avg_geodesic_dist_LCC"] = nx.average_shortest_path_length(G_lcc) dictTimes["avg_geodesic_dist_LCC"] = time.time() - start dictIsDisc["avg_geodesic_dist_LCC"] = False # Average degree of the neighborhood of each node start = time.time() dictSums["avg_deg_connectivity"] = np.mean( list(nx.average_degree_connectivity(G).values())) dictTimes["avg_deg_connectivity"] = time.time() - start dictIsDisc["avg_deg_connectivity"] = False # Average degree of the neighbors of each node in the LCC start = time.time() dictSums["avg_deg_connectivity_LCC"] = np.mean( list(nx.average_degree_connectivity(G_lcc).values())) dictTimes["avg_deg_connectivity_LCC"] = time.time() - start dictIsDisc["avg_deg_connectivity_LCC"] = False # Recover the degree distribution start_degree_extract = time.time() degree_vals = list(dict(G.degree()).values()) degree_extract_time = time.time() - start_degree_extract # Entropy of the degree distribution start = time.time() dictSums["degree_entropy"] = ss.entropy(degree_vals) dictTimes["degree_entropy"] = time.time() - start + degree_extract_time dictIsDisc["degree_entropy"] = False # Maximum degree start = time.time() dictSums["degree_max"] = max(degree_vals) dictTimes["degree_max"] = time.time() - start + degree_extract_time dictIsDisc["degree_max"] = True # Average degree start = time.time() dictSums["degree_mean"] = np.mean(degree_vals) dictTimes["degree_mean"] = time.time() - start + degree_extract_time dictIsDisc["degree_mean"] = False # Median degree start = time.time() dictSums["degree_median"] = np.median(degree_vals) dictTimes["degree_median"] = time.time() - start + degree_extract_time dictIsDisc["degree_median"] = False # Standard deviation of the degree distribution start = time.time() dictSums["degree_std"] = np.std(degree_vals) dictTimes["degree_std"] = time.time() - start + degree_extract_time dictIsDisc["degree_std"] = False # Quantile 25% start = time.time() dictSums["degree_q025"] = np.quantile(degree_vals, 0.25) dictTimes["degree_q025"] = time.time() - start + degree_extract_time dictIsDisc["degree_q025"] = False # Quantile 75% start = time.time() dictSums["degree_q075"] = np.quantile(degree_vals, 0.75) dictTimes["degree_q075"] = time.time() - start + degree_extract_time dictIsDisc["degree_q075"] = False # Average geodesic distance start = time.time() dictSums["avg_shortest_path_length_LCC"] = nx.average_shortest_path_length( G_lcc) dictTimes["avg_shortest_path_length_LCC"] = time.time() - start dictIsDisc["avg_shortest_path_length_LCC"] = False # Average global efficiency: # The efficiency of a pair of nodes in a graph is the multiplicative # inverse of the shortest path distance between the nodes. # The average global efficiency of a graph is the average efficiency of # all pairs of nodes. start = time.time() dictSums["avg_global_efficiency"] = nx.global_efficiency(G) dictTimes["avg_global_efficiency"] = time.time() - start dictIsDisc["avg_global_efficiency"] = False # Harmonic mean which is 1/avg_global_efficiency start = time.time() dictSums["harmonic_mean"] = nx.global_efficiency(G) dictTimes["harmonic_mean"] = time.time() - start dictIsDisc["harmonic_mean"] = False # Average local efficiency # The local efficiency of a node in the graph is the average global # efficiency of the subgraph induced by the neighbors of the node. # The average local efficiency is the average of the # local efficiencies of each node. start = time.time() dictSums["avg_local_efficiency_LCC"] = nx.local_efficiency(G_lcc) dictTimes["avg_local_efficiency_LCC"] = time.time() - start dictIsDisc["avg_local_efficiency_LCC"] = False # Node connectivity # The node connectivity is equal to the minimum number of nodes that # must be removed to disconnect G or render it trivial. # Only on the largest connected component here. start = time.time() dictSums["node_connectivity_LCC"] = nx.node_connectivity(G_lcc) dictTimes["node_connectivity_LCC"] = time.time() - start dictIsDisc["node_connectivity_LCC"] = True # Edge connectivity # The edge connectivity is equal to the minimum number of edges that # must be removed to disconnect G or render it trivial. # Only on the largest connected component here. start = time.time() dictSums["edge_connectivity_LCC"] = nx.edge_connectivity(G_lcc) dictTimes["edge_connectivity_LCC"] = time.time() - start dictIsDisc["edge_connectivity_LCC"] = True # Graph transitivity # 3*times the number of triangles divided by the number of triades start = time.time() dictSums["transitivity"] = nx.transitivity(G) dictTimes["transitivity"] = time.time() - start dictIsDisc["transitivity"] = False # Number of triangles start = time.time() dictSums["num_triangles"] = np.sum(list(nx.triangles(G).values())) / 3 dictTimes["num_triangles"] = time.time() - start dictIsDisc["num_triangles"] = True # Estimate of the average clustering coefficient of G: # Average local clustering coefficient, with local clustering coefficient # defined as C_i = (nbr of pairs of neighbors of i that are connected)/(nbr of pairs of neighbors of i) start = time.time() dictSums["avg_clustering_coef"] = nx.average_clustering(G) dictTimes["avg_clustering_coef"] = time.time() - start dictIsDisc["avg_clustering_coef"] = False # Square clustering (averaged over nodes): # the fraction of possible squares that exist at the node. # We average it over nodes start = time.time() dictSums["square_clustering_mean"] = np.mean( list(nx.square_clustering(G).values())) dictTimes["square_clustering_mean"] = time.time() - start dictIsDisc["square_clustering_mean"] = False # We compute the median start = time.time() dictSums["square_clustering_median"] = np.median( list(nx.square_clustering(G).values())) dictTimes["square_clustering_median"] = time.time() - start dictIsDisc["square_clustering_median"] = False # We compute the standard deviation start = time.time() dictSums["square_clustering_std"] = np.std( list(nx.square_clustering(G).values())) dictTimes["square_clustering_std"] = time.time() - start dictIsDisc["square_clustering_std"] = False # Number of 2-cores start = time.time() dictSums["num_2cores"] = len(nx.k_core(G, k=2)) dictTimes["num_2cores"] = time.time() - start dictIsDisc["num_2cores"] = True # Number of 3-cores start = time.time() dictSums["num_3cores"] = len(nx.k_core(G, k=3)) dictTimes["num_3cores"] = time.time() - start dictIsDisc["num_3cores"] = True # Number of 4-cores start = time.time() dictSums["num_4cores"] = len(nx.k_core(G, k=4)) dictTimes["num_4cores"] = time.time() - start dictIsDisc["num_4cores"] = True # Number of 5-cores start = time.time() dictSums["num_5cores"] = len(nx.k_core(G, k=5)) dictTimes["num_5cores"] = time.time() - start dictIsDisc["num_5cores"] = True # Number of 6-cores start = time.time() dictSums["num_6cores"] = len(nx.k_core(G, k=6)) dictTimes["num_6cores"] = time.time() - start dictIsDisc["num_6cores"] = True # Number of k-shells # The k-shell is the subgraph induced by nodes with core number k. # That is, nodes in the k-core that are not in the k+1-core # Number of 2-shells start = time.time() dictSums["num_2shells"] = len(nx.k_shell(G, 2)) dictTimes["num_2shells"] = time.time() - start dictIsDisc["num_2shells"] = True # Number of 3-shells start = time.time() dictSums["num_3shells"] = len(nx.k_shell(G, 3)) dictTimes["num_3shells"] = time.time() - start dictIsDisc["num_3shells"] = True # Number of 4-shells start = time.time() dictSums["num_4shells"] = len(nx.k_shell(G, 4)) dictTimes["num_4shells"] = time.time() - start dictIsDisc["num_4shells"] = True # Number of 5-shells start = time.time() dictSums["num_5shells"] = len(nx.k_shell(G, 5)) dictTimes["num_5shells"] = time.time() - start dictIsDisc["num_5shells"] = True # Number of 6-shells start = time.time() dictSums["num_6shells"] = len(nx.k_shell(G, 6)) dictTimes["num_6shells"] = time.time() - start dictIsDisc["num_6shells"] = True start = time.time() listOfCliques = list(nx.enumerate_all_cliques(G)) enum_all_cliques_time = time.time() - start # Number of 4-cliques start = time.time() n4Clique = 0 for li in listOfCliques: if len(li) == 4: n4Clique += 1 dictSums["num_4cliques"] = n4Clique dictTimes["num_4cliques"] = time.time() - start + enum_all_cliques_time dictIsDisc["num_4cliques"] = True # Number of 5-cliques start = time.time() n5Clique = 0 for li in listOfCliques: if len(li) == 5: n5Clique += 1 dictSums["num_5cliques"] = n5Clique dictTimes["num_5cliques"] = time.time() - start + enum_all_cliques_time dictIsDisc["num_5cliques"] = True # Maximal size of a clique in the graph start = time.time() dictSums["max_clique_size"] = len(approximation.clique.max_clique(G)) dictTimes["max_clique_size"] = time.time() - start dictIsDisc["max_clique_size"] = True # Approximated size of a large clique in the graph start = time.time() dictSums["large_clique_size"] = approximation.large_clique_size(G) dictTimes["large_clique_size"] = time.time() - start dictIsDisc["large_clique_size"] = True # Number of shortest path of size k start = time.time() listOfPLength = list(nx.shortest_path_length(G)) path_length_time = time.time() - start # when k = 3 start = time.time() n3Paths = 0 for node in G.nodes(): tmp = list(listOfPLength[node][1].values()) n3Paths += tmp.count(3) dictSums["num_shortest_3paths"] = n3Paths / 2 dictTimes["num_shortest_3paths"] = time.time() - start + path_length_time dictIsDisc["num_shortest_3paths"] = True # when k = 4 start = time.time() n4Paths = 0 for node in G.nodes(): tmp = list(listOfPLength[node][1].values()) n4Paths += tmp.count(4) dictSums["num_shortest_4paths"] = n4Paths / 2 dictTimes["num_shortest_4paths"] = time.time() - start + path_length_time dictIsDisc["num_shortest_4paths"] = True # when k = 5 start = time.time() n5Paths = 0 for node in G.nodes(): tmp = list(listOfPLength[node][1].values()) n5Paths += tmp.count(5) dictSums["num_shortest_5paths"] = n5Paths / 2 dictTimes["num_shortest_5paths"] = time.time() - start + path_length_time dictIsDisc["num_shortest_5paths"] = True # when k = 6 start = time.time() n6Paths = 0 for node in G.nodes(): tmp = list(listOfPLength[node][1].values()) n6Paths += tmp.count(6) dictSums["num_shortest_6paths"] = n6Paths / 2 dictTimes["num_shortest_6paths"] = time.time() - start + path_length_time dictIsDisc["num_shortest_6paths"] = True # Size of the minimum (weight) node dominating set: # A subset of nodes where each node not in the subset has for direct # neighbor a node of the dominating set. start = time.time() T = approximation.min_weighted_dominating_set(G) dictSums["size_min_node_dom_set"] = len(T) dictTimes["size_min_node_dom_set"] = time.time() - start dictIsDisc["size_min_node_dom_set"] = True # Idem but with the edge dominating set start = time.time() T = approximation.min_edge_dominating_set(G) dictSums["size_min_edge_dom_set"] = 2 * len( T) # times 2 to have a number of nodes dictTimes["size_min_edge_dom_set"] = time.time() - start dictIsDisc["size_min_edge_dom_set"] = True # The Wiener index of a graph is the sum of the shortest-path distances # between each pair of reachable nodes. For pairs of nodes in undirected graphs, # only one orientation of the pair is counted. # (On LCC otherwise inf) start = time.time() dictSums["wiener_index_LCC"] = nx.wiener_index(G_lcc) dictTimes["wiener_index_LCC"] = time.time() - start dictIsDisc["wiener_index_LCC"] = True # Betweenness node centrality (averaged over nodes): # at node u it is defined as B_u = sum_i,j sigma(i,u,j)/sigma(i,j) # where sigma is the number of shortest path between i and j going through u or not start = time.time() betweenness = list(nx.betweenness_centrality(G).values()) time_betweenness = time.time() - start # Averaged across nodes start = time.time() dictSums["betweenness_centrality_mean"] = np.mean(betweenness) dictTimes["betweenness_centrality_mean"] = time.time( ) - start + time_betweenness dictIsDisc["betweenness_centrality_mean"] = False # Maximum across nodes start = time.time() dictSums["betweenness_centrality_max"] = max(betweenness) dictTimes["betweenness_centrality_max"] = time.time( ) - start + time_betweenness dictIsDisc["betweenness_centrality_max"] = False # Central point dominance # CPD = sum_u(B_max - B_u)/(N-1) start = time.time() dictSums["central_point_dominance"] = sum( max(betweenness) - np.array(betweenness)) / (len(betweenness) - 1) dictTimes["central_point_dominance"] = time.time( ) - start + time_betweenness dictIsDisc["central_point_dominance"] = False # Estrata index : sum_i^n exp(lambda_i) # with n the number of nodes, lamda_i the i-th eigen value of the adjacency matrix of G start = time.time() dictSums["Estrata_index"] = nx.estrada_index(G) dictTimes["Estrata_index"] = time.time() - start dictIsDisc["Estrata_index"] = False # Eigenvector centrality # For each node, it is the average eigenvalue centrality of its neighbors, # where centrality of node i is taken as the i-th coordinate of x # such that Ax = lambda*x (for the maximal eigen value) # Averaged start = time.time() dictSums["avg_eigenvec_centrality"] = np.mean( list(nx.eigenvector_centrality_numpy(G).values())) dictTimes["avg_eigenvec_centrality"] = time.time() - start dictIsDisc["avg_eigenvec_centrality"] = False # Maximum start = time.time() dictSums["max_eigenvec_centrality"] = max( list(nx.eigenvector_centrality_numpy(G).values())) dictTimes["max_eigenvec_centrality"] = time.time() - start dictIsDisc["max_eigenvec_centrality"] = False ### Noise generation ### # Noise simulated from a Normal(0,1) distribution start = time.time() dictSums["noise_Gauss"] = ss.norm.rvs(0, 1) dictTimes["noise_Gauss"] = time.time() - start dictIsDisc["noise_Gauss"] = False # Noise simulated from a Uniform distribution [0-50] start = time.time() dictSums["noise_Unif"] = ss.uniform.rvs(0, 50) dictTimes["noise_Unif"] = time.time() - start dictIsDisc["noise_Unif"] = False # Noise simulated from a Bernoulli B(0.5) distribution start = time.time() dictSums["noise_Bern"] = ss.bernoulli.rvs(0.5) dictTimes["noise_Bern"] = time.time() - start dictIsDisc["noise_Bern"] = True # Noise simulated from a discrete uniform distribution [0,50[ start = time.time() dictSums["noise_disc_Unif"] = ss.randint.rvs(0, 50) dictTimes["noise_disc_Unif"] = time.time() - start dictIsDisc["noise_disc_Unif"] = True resDicts = (dictSums, dictTimes, dictIsDisc) return resDicts
def centrality(self): result = {} result['degree_centrality'] = nx.degree_centrality(self.graph) if self.directed == 'directed': result['in_degree_centrality'] = nx.in_degree_centrality( self.graph) result['out_degree_centrality'] = nx.out_degree_centrality( self.graph) result['closeness_centrality'] = nx.closeness_centrality(self.graph) result['betweenness_centrality'] = nx.betweenness_centrality( self.graph) # fix the tuple cant decode into json problem stringify_temp = {} temp = nx.edge_betweenness_centrality(self.graph) for key in temp.keys(): stringify_temp[str(key)] = temp[key] result['edge_betweenness_centrality'] = stringify_temp if self.directed == 'undirected': result[ 'current_flow_closeness_centrality'] = nx.current_flow_closeness_centrality( self.graph) result[ 'current_flow_betweenness_centrality'] = nx.current_flow_betweenness_centrality( self.graph) stringify_temp = {} temp = nx.edge_current_flow_betweenness_centrality(self.graph) for key in temp.keys(): stringify_temp[str(key)] = temp[key] result['edge_current_flow_betweenness_centrality'] = stringify_temp result[ 'approximate_current_flow_betweenness_centrality'] = nx.approximate_current_flow_betweenness_centrality( self.graph) result['eigenvector_centrality'] = nx.eigenvector_centrality( self.graph) result[ 'eigenvector_centrality_numpy'] = nx.eigenvector_centrality_numpy( self.graph) result['katz_centrality'] = nx.katz_centrality(self.graph) result['katz_centrality_numpy'] = nx.katz_centrality_numpy( self.graph) result['communicability'] = nx.communicability(self.graph) result['communicability_exp'] = nx.communicability_exp(self.graph) result[ 'communicability_centrality'] = nx.communicability_centrality( self.graph) result[ 'communicability_centrality_exp'] = nx.communicability_centrality_exp( self.graph) result[ 'communicability_betweenness_centrality'] = nx.communicability_betweenness_centrality( self.graph) result['estrada_index'] = nx.estrada_index(self.graph) result['load_centrality'] = nx.load_centrality(self.graph) stringify_temp = {} temp = nx.edge_load(self.graph) for key in temp.keys(): stringify_temp[str(key)] = temp[key] result['edge_load'] = stringify_temp result['dispersion'] = nx.dispersion(self.graph) fname_centra = self.DIR + '/centrality.json' with open(fname_centra, "w") as f: json.dump(result, f, cls=SetEncoder, indent=2) print(fname_centra)
def calculate(graph): return Utils.approx_to_int(nx.estrada_index(graph))
def compute_subgraph_center(self, subgraph): if self.method == 'betweenness_centrality': d = nx.betweenness_centrality(subgraph, weight='weight') center = max(d, key=d.get) elif self.method == 'betweenness_centrality_subset': d = nx.betweenness_centrality_subset(subgraph, weight='weight') center = max(d, key=d.get) elif self.method == 'information_centrality': d = nx.information_centrality(subgraph, weight='weight') center = max(d, key=d.get) elif self.method == 'local_reaching_centrality': d = {} for n in self.G.nodes(): d[n] = nx.local_reaching_centrality(self.G, n, weight='weight') center = max(d, key=d.get) elif self.method == 'voterank': d = nx.voterank(subgraph) center = max(d, key=d.get) elif self.method == 'percolation_centrality': d = nx.percolation_centrality(subgraph, weight='weight') center = max(d, key=d.get) elif self.method == 'subgraph_centrality': d = nx.subgraph_centrality(subgraph) center = max(d, key=d.get) elif self.method == 'subgraph_centrality_exp': d = nx.subgraph_centrality_exp(subgraph) center = max(d, key=d.get) elif self.method == 'estrada_index': d = nx.estrada_index(subgraph) center = max(d, key=d.get) elif self.method == 'second_order_centrality': d = nx.second_order_centrality(subgraph) center = max(d, key=d.get) elif self.method == 'eigenvector_centrality': d = nx.eigenvector_centrality(subgraph, weight='weight') center = max(d, key=d.get) elif self.method == 'load_centrality': d = nx.load_centrality(subgraph, weight='weight') center = max(d, key=d.get) elif self.method == 'closeness_centrality': d = nx.closeness_centrality(subgraph) center = max(d, key=d.get) elif self.method == 'current_flow_closeness_centrality': d = nx.current_flow_closeness_centrality(subgraph, weight='weight') center = max(d, key=d.get) elif self.method == 'current_flow_betweenness_centrality': d = nx.current_flow_betweenness_centrality(subgraph, weight='weight') center = max(d, key=d.get) elif self.method == 'current_flow_betweenness_centrality_subset': d = nx.current_flow_betweenness_centrality_subset(subgraph, weight='weight') center = max(d, key=d.get) elif self.method == 'approximate_current_flow_betweenness_centrality': d = nx.approximate_current_flow_betweenness_centrality( subgraph, weight='weight') center = max(d, key=d.get) elif self.method == 'harmonic_centrality': d = nx.harmonic_centrality(subgraph) center = max(d, key=d.get) elif self.method == 'page_rank': d = nx.pagerank(subgraph, weight='weight') center = max(d, key=d.get) elif self.method == 'hits': d = nx.hits(subgraph) center = max(d, key=d.get) elif self.method == 'katz_centrality': d = nx.katz_centrality(subgraph, weight='weight') center = max(d, key=d.get) else: new_centers = nx.center(subgraph) # new_centers gives a list of centers and here we just pick one randomly --not good for stability # to do : find a better way to choose the center--make it stable index = random.randint(0, len(new_centers) - 1) center = new_centers[index] return center
def centrality(g): nx_g = pyzx2nx(g.copy()) return nx.estrada_index(nx_g)
#This script retur the property of a given .dot file import networkx as nx import sys, getopt filename=sys.argv[1] curNet = nx.Graph(nx.read_dot(filename)) bc=nx.betweenness_centrality(curNet) spl=nx.average_shortest_path_length(curNet) maxbc=bc[max(bc,key=bc.get)] minbc=bc[min(bc,key=bc.get)] meanbc=sum(bc.values())/len(bc) estrada_i=nx.estrada_index(curNet) print( str(spl) + ',' + str(maxbc) + ',' + str(minbc) + ','+ str(meanbc) + ',' + str(estrada_i))
def get_graph(Mat_D, Threshold, percentageConnections=False, complet=False): import scipy.io as sio import numpy as np import networkx as nx import pandas as pd import os Data = sio.loadmat(Mat_D) matX = Data['Correlation'] #[:tamn,:tamn] labels = Data['labels'] print(np.shape(matX)) print(np.shape(labels)) print(np.min(matX), np.max(matX)) if percentageConnections: if percentageConnections > 0 and percentageConnections < 1: for i in range(-100, 100): per = np.sum(matX > i / 100.) / np.size(matX) if per <= Threshold: Threshold = i / 100. break print(Threshold) else: print('The coefficient is outside rank') #Lista de conexion del grafo row, col = np.shape(matX) e = [] for i in range(1, row): for j in range(i): if complet: e.append((labels[i], labels[j], matX[i, j])) else: if matX[i, j] > Threshold: e.append((labels[i], labels[j], matX[i, j])) print(np.shape(e)[0], int(((row - 1) * row) / 2)) #Generar grafo G = nx.Graph() G.add_weighted_edges_from(e) labelNew = list(G.nodes) #Metricas por grafo (ponderados) Dpc = nx.degree_pearson_correlation_coefficient(G, weight='weight') cluster = nx.average_clustering(G, weight='weight') #No ponderados estra = nx.estrada_index(G) tnsity = nx.transitivity(G) conNo = nx.average_node_connectivity(G) ac = nx.degree_assortativity_coefficient(G) #Metricas por nodo tam = 15 BoolCenV = False BoolLoad = False alpha = 0.1 beta = 1.0 katxCN = nx.katz_centrality_numpy(G, alpha=alpha, beta=beta, weight='weight') bcen = nx.betweenness_centrality(G, weight='weight') av_nd = nx.average_neighbor_degree(G, weight='weight') ctr = nx.clustering(G, weight='weight') ranPaN = nx.pagerank_numpy(G, weight='weight') Gol_N = nx.hits_numpy(G) Dgc = nx.degree_centrality(G) cl_ce = nx.closeness_centrality(G) cluster_Sq = nx.square_clustering(G) centr = nx.core_number(G) cami = nx.node_clique_number(G) camiN = nx.number_of_cliques(G) trian = nx.triangles(G) colorG = nx.greedy_color(G) try: cenVNum = nx.eigenvector_centrality_numpy(G, weight='weight') tam = tam + 1 BoolCenV = True except TypeError: print( "La red es muy pequeña y no se puede calcular este parametro gil") except: print('NetworkXPointlessConcept: graph null') if Threshold > 0: carga_cen = nx.load_centrality(G, weight='weight') #Pesos positivos BoolLoad = True tam = tam + 1 #katxC=nx.katz_centrality(G, alpha=alpha, beta=beta, weight='weight') #cenV=nx.eigenvector_centrality(G,weight='weight') #cenV=nx.eigenvector_centrality(G,weight='weight') #Golp=nx.hits(G) #Gol_si=nx.hits_scipy(G) #ranPa=nx.pagerank(G, weight='weight') #ranPaS=nx.pagerank_scipy(G, weight='weight') matrix_datos = np.zeros((tam, np.shape(labelNew)[0])) tam = 15 print(np.shape(matrix_datos)) lim = np.shape(labelNew)[0] for i in range(lim): roi = labelNew[i] #print(roi) matrix_datos[0, i] = katxCN[roi] matrix_datos[1, i] = bcen[roi] matrix_datos[2, i] = av_nd[roi] matrix_datos[3, i] = ctr[roi] matrix_datos[4, i] = ranPaN[roi] matrix_datos[5, i] = Gol_N[0][roi] matrix_datos[6, i] = Gol_N[1][roi] matrix_datos[7, i] = Dgc[roi] matrix_datos[8, i] = cl_ce[roi] matrix_datos[9, i] = cluster_Sq[roi] matrix_datos[10, i] = centr[roi] matrix_datos[11, i] = cami[roi] matrix_datos[12, i] = camiN[roi] matrix_datos[13, i] = trian[roi] matrix_datos[14, i] = colorG[roi] if BoolCenV: matrix_datos[15, i] = cenVNum[roi] tam = tam + 1 if BoolLoad: matrix_datos[16, i] = carga_cen[roi] tam = tam + 1 #matrix_datos[0,i]=katxC[roi] #matrix_datos[2,i]=cenV[roi] #matrix_datos[7,i]=Golp[0][roi] #matrix_datos[9,i]=Gol_si[0][roi] #matrix_datos[10,i]=Golp[1][roi] #matrix_datos[12,i]=Gol_si[1][roi] #matrix_datos[22,i]=ranPa[roi] #matrix_datos[24,i]=ranPaS[roi] FuncName = [ 'degree_pearson_correlation_coefficient', 'average_clustering', 'estrada_index', 'transitivity', 'average_node_connectivity', 'degree_assortativity_coefficient', 'katz_centrality_numpy', 'betweenness_centrality', 'average_neighbor_degree', 'clustering', 'pagerank_numpy', 'hits_numpy0', 'hits_numpy1', 'degree_centrality', 'closeness_centrality', 'square_clustering', 'core_number', 'node_clique_number', 'number_of_cliques', 'triangles', 'greedy_color', 'eigenvector_centrality_numpy', 'load_centrality' ] frame = pd.DataFrame(matrix_datos) frame.columns = labelNew frame.index = FuncName[6:tam] Resul = os.getcwd() out_data = Resul + '/graph_metrics.csv' out_mat = Resul + '/graph_metrics_global.mat' frame.to_csv(out_data) sio.savemat( out_mat, { FuncName[0]: Dpc, FuncName[1]: cluster, FuncName[2]: estra, FuncName[3]: tnsity, FuncName[4]: conNo, FuncName[5]: ac }) return out_data, out_mat
def estgexf(): global fpname indegree_list = [] outdegree_list = [] closeness_list = [] betweenness_list = [] users_list = [] clustering_list = [] file_path = fpname output_est = file(file_path + "_stats.txt", "w") print >> output_est, "node\tindegree\toutdegree\tcloseness\tbetweenness\tclustering\tcomponents\tdiameter" indegree = nx.in_degree_centrality(G) outdegree = nx.out_degree_centrality(G) closeness = nx.closeness_centrality(G) betweenness = nx.betweenness_centrality(G) ndG = G.to_undirected() clustering = nx.clustering(ndG) try: diameter = nx.diameter(G) except: diameter = 1000 for key, value in clustering.items(): clustering_list.append(value) clusv = sum(clustering_list) / float(len(clustering_list)) for key, value in indegree.items(): indegree_list.append(value) indv = sum(indegree_list) / float(len(indegree_list)) for key, value in outdegree.items(): outdegree_list.append(value) outv = sum(outdegree_list) / float(len(outdegree_list)) for key, value in closeness.items(): closeness_list.append(value) closev = sum(closeness_list) / float(len(closeness_list)) for key, value in betweenness.items(): betweenness_list.append(value) users_list.append(key) betv = sum(betweenness_list) / float(len(betweenness_list)) components_count = len(users_list) for i in users_list: pos = users_list.index(i) print >> output_est, i, "\t", indegree_list[pos], "\t", outdegree_list[pos], "\t", closeness_list[ pos ], "\t", betweenness_list[pos], "\t", clustering_list[pos], "\t", components_count, "\t", diameter output_est.close() ascoef = nx.degree_assortativity_coefficient(G) estr = nx.estrada_index(ndG) print "Network indicators for", fpname print "" print "---------------------------------------" if diameter == 1000: print "Diameter: disconnected network" else: print "Diameter:", diameter print "Average clustering:", clusv print "Average Indegree centrality:", indv print "Average Outdegree centrality:", outv print "Average Closeness centrality:", closev print "Average Betweenness centrality:", betv print "Components count:", components_count print "Assortativity coefficient:", ascoef print "Estrada index", estr print "---------------------------------------" print "" return False
prev_connected_comp = [] for cx in pres_connected_component: prev_connected_comp.append(cx) trajectory_to_node_map_prev = trajectory_to_node_map_pres trajectory_to_node_map_pres = {} # print("Shailja", R.nodes, R.edges) for node in delete_nodes: if G.has_node(node): G.remove_node(node) ind_list = [x + 1 for x in ind_list] for j in range(len(traj_list)): if (ind_list[j] >= len( I.trajectories[traj_list[j]].points)): del_ind_tra.append(j) traj_list = [ i for j, i in enumerate(traj_list) if j not in del_ind_tra ] ind_list = [ i for j, i in enumerate(ind_list) if j not in del_ind_tra ] print(trkname, len(streamlines), len(R.nodes()), len(R.edges()), nx.average_clustering(R), nx.estrada_index(R)) # except: # print("Error not handled")
def large_graph_metric(G, weight=None, thresh=None): # threshold graph edges based on weight if thresh is None: H = G else: if weight is None: raise ValueError('No weight is given for thresholding') rm_edges = [] for edge in G.edges_iter(data=True): if edge[2]['weight'] < thresh: rm_edges.append([edge[0], edge[1]]) H = G.copy() H.remove_edges_from(rm_edges) # create feature vector x = np.ones(13) # clustering coefficient x[0] = nx.average_clustering(H) # node connectivity x[1] = nx.node_connectivity(H) # average betweeness centrality # bc = nx.betweenness_centrality(G, weight=weight) bc = nx.betweenness_centrality(G) x[2] = float(sum(bc[node] for node in bc)) / len(bc) # average closeness centrality # cc = nx.closeness_centrality(G, weight=weight) cc = nx.closeness_centrality(G) x[3] = float(sum(cc[node] for node in cc)) / len(cc) # estrada index x[4] = nx.estrada_index(H) # average degree x[5] = float(sum(len(H[u]) for u in H)) / len(H) # standard deviation degree # x[6] = float(sum((x[4] - len(H[u]))**2)) / len(H) # degree assortativity coefficient # x[7] = nx.degree_assortativity_coefficient(G, weight=weight) x[7] = nx.degree_assortativity_coefficient(G) # transitivity x[8] = nx.transitivity(H) # diameter x[9] = nx.diameter(H) # eccentricity # x[10] = nx.eccentricity(H) # periphery # x[11] = nx.periphery(H) # radius x[12] = nx.radius(H) return x
count = 1 l = [] for ind, row in df_cd.iterrows(): fcode = row['STATECD'] st = row['STATEFP'] #print(fcode[0:2]+' ' + fcode[2:]+ ' || ' + str(count) + ' of ' + str(436)) count += 1 # open matrix if st == state or state == 'all': adj = np.loadtxt('adj_mats/' + fcode[0:2] + '_' + fcode + '_dist.txt', delimiter=',').astype(int) #G=nx.path_graph(500) G = nx.from_numpy_matrix(adj) spec = nx.estrada_index(G) / len(G.nodes) #/adj.shape[0] ''' deg = np.diag(adj.sum(axis=0)).astype(int) lap = deg-adj #print(lap.shape) lap = lap.astype(int) m=lap spec = np.real(scipy.linalg.eigvals(m).tolist()) #print(np.sum(adj)) spec.sort() #print(spec[0:6]) spec = [np.log(x) for x in spec if x>0]