def average_betweenness_centrality(G, connected_only=True): # Betweenness centrality of a node v is the sum of the fraction of all-pairs # shortest paths that pass through v. # Only densely connected hidden nodes can have positive betweenness_centrality HBC = betweenness_centrality(G) if connected_only: cm = np.array(nx.adjacency_matrix(G).todense()) num_hidden = number_of_densely_connected_nodes(cm) else: num_hidden = len(agent.hidden_ixs) avHBC = sum([HBC[b] for b in HBC]) / num_hidden return avHBC
def compute_stats(df_fname, dist_mat=None): df = pd.read_hdf(df_fname) # Stats organized as follows: # name : lambda/function to be called on (g) dists = np.loadtxt(dist_mat) stats = { "edgecount": nxf.number_of_edges, "globaleffic": nxa.global_efficiency, "degree": lambda g: dict(nxf.degree(g)).values(), "modularity": lambda g: nntm.consensus_modularity(nx.adj_matrix(g).toarray(), seed=42)[1].mean(), "assort": nxa.assortativity.degree_assortativity_coefficient, "avplength": lambda g: np.mean(connection_length(g, dists=dists)), "weight": lambda g: list(nxf.get_edge_attributes(g, 'weight').values()), "ccoeff": lambda g: list(nxa.clustering(g, weight=None).values()), "betweenness": lambda g: list( nxa.betweenness_centrality(g, weight='weight').values()), "plength": lambda g: connection_length(g, dists=dists) } # create dict (and eventual DF) column per stat with the following struct: stat_results = {'index': []} stat_results.update({stat_name: [] for stat_name in stats.keys()}) for idx, row in df.iterrows(): tmpg = nx.Graph(row.graph) stat_results['index'] += [row['index']] for stat_name, stat_fn in stats.items(): tmps = stat_fn(tmpg) try: len(tmps) stat_results[stat_name] += [np.array(list(tmps))] except TypeError: stat_results[stat_name] += [tmps] stat_df = pd.DataFrame.from_dict(stat_results) stat_df.to_hdf(df_fname.replace('.h5', '_stats.h5'), "stats", mode="w")
def calculate_betweenness_centrality(self): G = self._graph self._betweenness_centralities = nxa.betweenness_centrality(G)
from networkx.algorithms import betweenness_centrality from utils import build_complete_graph dep_graph = build_complete_graph() betw = betweenness_centrality(dep_graph) betw = sorted(betw.items(), key=lambda x: x[1], reverse=True) num_package = 13 for package, betw_score in betw[:num_package]: print(f"{package}, {betw_score:.6f}")
def getBetweenessCentrality(self, G): return nalgos.betweenness_centrality(G)
V = snap.TIntFltH() E = snap.TIntPrFltH() snap.GetBetweennessCentr(G_snap, V, E, 1.0) values = [] for i in V: values.append(V[i]) n = len(values) betweenness_coef = sum(values) / ((n - 1) * (n - 2)) report += ["Average betweenness coefficient: %f" % float(betweenness_coef)] print report[-1] else: # Clustering coefficient average_clustering_coef = nx.average_clustering(G) report += ["Average clustering coefficient: %f" % float(average_clustering_coef)] print report[-1] values = algos.betweenness_centrality(G) n = len(values) betweenness_coef = sum(values) / ((n - 1) * (n - 2)) report += ["Average betweenness coefficient: %f" % float(betweenness_coef)] print report[-1] # Saving eigs for faster computations if not (os.path.exists("outputs/q4/intermediates/vecs.npy") and os.path.exists("outputs/q4/intermediates/vals.npy")): laplacian_matrix = nx.linalg.laplacian_matrix(G).todense() vals, vecs = np.linalg.eigh(laplacian_matrix) np.save('outputs/q4/intermediates/vecs.npy', vecs) np.save('outputs/q4/intermediates/vals.npy', vals) else: vecs = np.load('outputs/q4/intermediates/vecs.npy') vals = np.load('outputs/q4/intermediates/vals.npy')
''' a random graph can be generated using the Erdos-Renyi algorithm for example ''' G = nx.erdos_renyi_graph(100, 0.1) # draw network nx.draw(G) # degree degree = degree_centrality(G) # eigenvector_centrality ec = eigenvector_centrality(G) # betweeness centrality bc = betweenness_centrality(G) # visualize results # --+ df df = pd.DataFrame({'degree': degree, 'eigenvector_centrality': ec, 'betweenness_centrality': bc}) # --+ correlation matrix df.corr() # --+ scatter plot matrix sns.pairplot(df) # %% node centrality - case B, small world network # generator '''
def extractnetstats(ID, network, thr, conn_model, est_path, mask, out_file=None): from pynets import thresholding, utils pruning = True ##Load and threshold matrix in_mat = np.array(np.genfromtxt(est_path)) in_mat = thresholding.autofix(in_mat) ##Normalize connectivity matrix (weights between 0-1) in_mat = thresholding.normalize(in_mat) ##Get hyperbolic tangent of matrix if non-sparse (i.e. fischer r-to-z transform) if conn_model == 'corr': in_mat = np.arctanh(in_mat) in_mat[np.isnan(in_mat)] = 0 in_mat[np.isinf(in_mat)] = 1 ##Get dir_path dir_path = os.path.dirname(os.path.realpath(est_path)) ##Load numpy matrix as networkx graph G_pre = nx.from_numpy_matrix(in_mat) ##Prune irrelevant nodes (i.e. nodes who are fully disconnected from the graph and/or those whose betweenness centrality are > 3 standard deviations below the mean) if pruning == True: [G_pruned, _, _] = most_important(G_pre) else: G_pruned = G_pre ##Make directed if sparse if conn_model != 'corr' and conn_model != 'cov' and conn_model != 'tangent': G_di = nx.DiGraph(G_pruned) G_dir = G_di.to_directed() G = G_pruned else: G = G_pruned ##Get corresponding matrix in_mat = nx.to_numpy_array(G) ##Print graph summary print('\n\nThreshold: ' + str(thr)) print('Source File: ' + str(est_path)) info_list = list(nx.info(G).split('\n'))[2:] for i in info_list: print(i) try: G_dir print('Analyzing DIRECTED graph when applicable...') except: print('Graph is UNDIRECTED') if conn_model == 'corr' or conn_model == 'cov' or conn_model == 'tangent': if nx.is_connected(G) == True: num_conn_comp = nx.number_connected_components(G) print('Graph is CONNECTED with ' + str(num_conn_comp) + ' connected component(s)') else: print('Graph is DISCONNECTED') print('\n') ##Create Length matrix mat_len = thresholding.weight_conversion(in_mat, 'lengths') ##Load numpy matrix as networkx graph G_len = nx.from_numpy_matrix(mat_len) ##Save G as gephi file if mask: if network: nx.write_graphml( G, dir_path + '/' + ID + '_' + network + '_' + str(os.path.basename(mask).split('.')[0]) + '.graphml') else: nx.write_graphml( G, dir_path + '/' + ID + '_' + str(os.path.basename(mask).split('.')[0]) + '.graphml') else: if network: nx.write_graphml(G, dir_path + '/' + ID + '_' + network + '.graphml') else: nx.write_graphml(G, dir_path + '/' + ID + '.graphml') ############################################################### ########### Calculate graph metrics from graph G ############## ############################################################### from networkx.algorithms import degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, transitivity, betweenness_centrality, eigenvector_centrality, communicability_betweenness_centrality, clustering, degree_centrality from pynets.netstats import average_local_efficiency, global_efficiency, local_efficiency, modularity_louvain_dir, smallworldness ##For non-nodal scalar metrics from custom functions, add the name of the function to metric_list and add the function (with a G-only input) to the netstats module. metric_list = [ global_efficiency, average_local_efficiency, smallworldness, degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, transitivity ] ##Custom Weight Parameter #custom_weight = 0.25 custom_weight = None ##Iteratively run functions from above metric list that generate single scalar output num_mets = len(metric_list) net_met_arr = np.zeros([num_mets, 2], dtype='object') j = 0 for i in metric_list: met_name = str(i).split('<function ')[1].split(' at')[0] net_met = met_name try: if i is 'average_shortest_path_length': try: try: net_met_val = float(i(G_dir)) print('Calculating from directed graph...') except: net_met_val = float(i(G)) except: ##case where G is not fully connected net_met_val = float( average_shortest_path_length_for_all(G)) if custom_weight is not None and i is 'degree_assortativity_coefficient' or i is 'global_efficiency' or i is 'average_local_efficiency' or i is 'average_clustering': custom_weight_param = 'weight = ' + str(custom_weight) try: net_met_val = float(i(G_dir, custom_weight_param)) print('Calculating from directed graph...') except: net_met_val = float(i(G, custom_weight_param)) else: try: net_met_val = float(i(G_dir)) print('Calculating from directed graph...') except: net_met_val = float(i(G)) except: net_met_val = np.nan net_met_arr[j, 0] = net_met net_met_arr[j, 1] = net_met_val print(net_met) print(str(net_met_val)) print('\n') j = j + 1 net_met_val_list = list(net_met_arr[:, 1]) ##Run miscellaneous functions that generate multiple outputs ##Calculate modularity using the Louvain algorithm [community_aff, modularity] = modularity_louvain_dir(in_mat) ##Calculate core-periphery subdivision [Coreness_vec, Coreness_q] = core_periphery_dir(in_mat) ##Local Efficiency try: try: le_vector = local_efficiency(G_dir) except: le_vector = local_efficiency(G) print('\nExtracting Local Efficiency vector for all network nodes...') le_vals = list(le_vector.values()) le_nodes = list(le_vector.keys()) num_nodes = len(le_nodes) le_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): le_arr[j, 0] = str(le_nodes[j]) + '_local_efficiency' #print('\n' + str(le_nodes[j]) + '_local_efficiency') try: le_arr[j, 1] = le_vals[j] except: le_arr[j, 1] = np.nan #print(str(le_vals[j])) j = j + 1 le_arr[num_nodes, 0] = 'MEAN_local_efficiency' nonzero_arr_le = np.delete(le_arr[:, 1], [0]) le_arr[num_nodes, 1] = np.mean(nonzero_arr_le) print('Mean Local Efficiency across nodes: ' + str(le_arr[num_nodes, 1])) print('\n') except: pass ##Local Clustering try: cl_vector = clustering(G) print('\nExtracting Local Clustering vector for all network nodes...') cl_vals = list(cl_vector.values()) cl_nodes = list(cl_vector.keys()) num_nodes = len(cl_nodes) cl_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): cl_arr[j, 0] = str(cl_nodes[j]) + '_local_clustering' #print('\n' + str(cl_nodes[j]) + '_local_clustering') try: cl_arr[j, 1] = cl_vals[j] except: cl_arr[j, 1] = np.nan #print(str(cl_vals[j])) j = j + 1 cl_arr[num_nodes, 0] = 'MEAN_local_efficiency' nonzero_arr_cl = np.delete(cl_arr[:, 1], [0]) cl_arr[num_nodes, 1] = np.mean(nonzero_arr_cl) print('Mean Local Clustering across nodes: ' + str(cl_arr[num_nodes, 1])) print('\n') except: pass ##Degree centrality try: try: dc_vector = degree_centrality(G_dir) except: dc_vector = degree_centrality(G) print('\nExtracting Degree Centrality vector for all network nodes...') dc_vals = list(dc_vector.values()) dc_nodes = list(dc_vector.keys()) num_nodes = len(dc_nodes) dc_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): dc_arr[j, 0] = str(dc_nodes[j]) + '_degree_centrality' #print('\n' + str(dc_nodes[j]) + '_degree_centrality') try: dc_arr[j, 1] = dc_vals[j] except: dc_arr[j, 1] = np.nan #print(str(cl_vals[j])) j = j + 1 dc_arr[num_nodes, 0] = 'MEAN_degree_centrality' nonzero_arr_dc = np.delete(dc_arr[:, 1], [0]) dc_arr[num_nodes, 1] = np.mean(nonzero_arr_dc) print('Mean Degree Centrality across nodes: ' + str(dc_arr[num_nodes, 1])) print('\n') except: pass ##Betweenness Centrality try: bc_vector = betweenness_centrality(G_len, normalized=True) print( '\nExtracting Betweeness Centrality vector for all network nodes...' ) bc_vals = list(bc_vector.values()) bc_nodes = list(bc_vector.keys()) num_nodes = len(bc_nodes) bc_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): bc_arr[j, 0] = str(bc_nodes[j]) + '_betweenness_centrality' #print('\n' + str(bc_nodes[j]) + '_betw_cent') try: bc_arr[j, 1] = bc_vals[j] except: bc_arr[j, 1] = np.nan #print(str(bc_vals[j])) j = j + 1 bc_arr[num_nodes, 0] = 'MEAN_betw_cent' nonzero_arr_betw_cent = np.delete(bc_arr[:, 1], [0]) bc_arr[num_nodes, 1] = np.mean(nonzero_arr_betw_cent) print('Mean Betweenness Centrality across nodes: ' + str(bc_arr[num_nodes, 1])) print('\n') except: pass ##Eigenvector Centrality try: try: ec_vector = eigenvector_centrality(G_dir, max_iter=1000) except: ec_vector = eigenvector_centrality(G, max_iter=1000) print( '\nExtracting Eigenvector Centrality vector for all network nodes...' ) ec_vals = list(ec_vector.values()) ec_nodes = list(ec_vector.keys()) num_nodes = len(ec_nodes) ec_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): ec_arr[j, 0] = str(ec_nodes[j]) + '_eigenvector_centrality' #print('\n' + str(ec_nodes[j]) + '_eig_cent') try: ec_arr[j, 1] = ec_vals[j] except: ec_arr[j, 1] = np.nan #print(str(ec_vals[j])) j = j + 1 ec_arr[num_nodes, 0] = 'MEAN_eig_cent' nonzero_arr_eig_cent = np.delete(ec_arr[:, 1], [0]) ec_arr[num_nodes, 1] = np.mean(nonzero_arr_eig_cent) print('Mean Eigenvector Centrality across nodes: ' + str(ec_arr[num_nodes, 1])) print('\n') except: pass ##Communicability Centrality try: cc_vector = communicability_betweenness_centrality(G, normalized=True) print( '\nExtracting Communicability Centrality vector for all network nodes...' ) cc_vals = list(cc_vector.values()) cc_nodes = list(cc_vector.keys()) num_nodes = len(cc_nodes) cc_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): cc_arr[j, 0] = str(cc_nodes[j]) + '_communicability_centrality' #print('\n' + str(cc_nodes[j]) + '_comm_cent') try: cc_arr[j, 1] = cc_vals[j] except: cc_arr[j, 1] = np.nan #print(str(cc_vals[j])) j = j + 1 cc_arr[num_nodes, 0] = 'MEAN_comm_cent' nonzero_arr_comm_cent = np.delete(cc_arr[:, 1], [0]) cc_arr[num_nodes, 1] = np.mean(nonzero_arr_comm_cent) print('Mean Communicability Centrality across nodes: ' + str(cc_arr[num_nodes, 1])) print('\n') except: pass ##Rich club coefficient try: rc_vector = rich_club_coefficient(G, normalized=True) print( '\nExtracting Rich Club Coefficient vector for all network nodes...' ) rc_vals = list(rc_vector.values()) rc_edges = list(rc_vector.keys()) num_edges = len(rc_edges) rc_arr = np.zeros([num_edges + 1, 2], dtype='object') j = 0 for i in range(num_edges): rc_arr[j, 0] = str(rc_edges[j]) + '_rich_club' #print('\n' + str(rc_edges[j]) + '_rich_club') try: rc_arr[j, 1] = rc_vals[j] except: rc_arr[j, 1] = np.nan #print(str(rc_vals[j])) j = j + 1 ##Add mean rc_arr[num_edges, 0] = 'MEAN_rich_club' nonzero_arr_rich_club = np.delete(rc_arr[:, 1], [0]) rc_arr[num_edges, 1] = np.mean(nonzero_arr_rich_club) print('Mean Rich Club Coefficient across edges: ' + str(rc_arr[num_edges, 1])) print('\n') except: pass ##Create a list of metric names for scalar metrics metric_list_names = [] net_met_val_list_final = net_met_val_list for i in net_met_arr[:, 0]: metric_list_names.append(i) ##Add modularity measure try: metric_list_names.append('Modularity') net_met_val_list_final.append(modularity) except: pass ##Add Core/Periphery measure try: metric_list_names.append('Coreness') net_met_val_list_final.append(Coreness_q) except: pass ##Add local efficiency measures try: for i in le_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(le_arr[:, 1]) except: pass ##Add local clustering measures try: for i in cl_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(cl_arr[:, 1]) except: pass ##Add centrality measures try: for i in dc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(dc_arr[:, 1]) except: pass try: for i in bc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(bc_arr[:, 1]) except: pass try: for i in ec_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(ec_arr[:, 1]) except: pass try: for i in cc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(cc_arr[:, 1]) except: pass ##Add rich club measure try: for i in rc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(rc_arr[:, 1]) except: pass ##Save metric names as pickle try: import cPickle except ImportError: import _pickle as cPickle if mask != None: if network != None: met_list_picke_path = os.path.dirname(os.path.abspath( est_path)) + '/net_metric_list_' + network + '_' + str( os.path.basename(mask).split('.')[0]) else: met_list_picke_path = os.path.dirname( os.path.abspath(est_path)) + '/net_metric_list_' + str( os.path.basename(mask).split('.')[0]) else: if network != None: met_list_picke_path = os.path.dirname( os.path.abspath(est_path)) + '/net_metric_list_' + network else: met_list_picke_path = os.path.dirname( os.path.abspath(est_path)) + '/net_metric_list' cPickle.dump(metric_list_names, open(met_list_picke_path, 'wb')) ##And save results to csv out_path = utils.create_csv_path(ID, network, conn_model, thr, mask, dir_path) np.savetxt(out_path, net_met_val_list_final) return (out_path)
def extractnetstats(est_path, ID, NETWORK, thr, sps_model, out_file=None): in_mat = np.array(genfromtxt(est_path)) def threshold_proportional(in_mat, thr): ##number of nodes n = len(in_mat) ##clear diagonal np.fill_diagonal(in_mat, 0) ##if symmetric matrix if np.allclose(in_mat, in_mat.T): ##ensure symmetry is preserved in_mat[np.tril_indices(n)] = 0 ##halve number of removed links ud = 2 else: ud = 1 ##find all links ind = np.where(in_mat) ##sort indices by magnitude I = np.argsort(in_mat[ind])[::-1] ##number of links to be preserved en = int(round((n * n - n) * float(thr) / ud)) ##apply threshold in_mat[(ind[0][I][en:], ind[1][I][en:])] = 0 ##if symmetric matrix if ud == 2: ##reconstruct symmetry in_mat[:, :] = in_mat + in_mat.T return in_mat threshold_proportional(in_mat, thr) ##Get hyperbolic tangent of graph if non-sparse (i.e. fischer r-to-z transform), and divide by the variance of the matrix if sps_model == False: in_mat = np.arctanh(in_mat) / np.var(in_mat) dir_path = os.path.dirname(os.path.realpath(est_path)) G = nx.from_numpy_matrix(in_mat) ############################################################### ############Calculate graph metrics from graph G############### ############################################################### from networkx.algorithms import degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, rich_club_coefficient, transitivity, betweenness_centrality #from networkx.algorithms.matching import min_maximal_matching from itertools import permutations import cPickle def efficiency(G, u, v): return float(1) / nx.shortest_path_length(G, u, v) def global_efficiency(G): n = len(G) denom = n * (n - 1) return float(sum(efficiency(G, u, v) for u, v in permutations(G, 2))) / denom def local_efficiency(G): return float(sum(global_efficiency(nx.ego_graph(G, v)) for v in G)) / len(G) metric_list = [ global_efficiency, local_efficiency, degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, rich_club_coefficient, transitivity ] num_mets = len(metric_list) net_met_arr = np.zeros([num_mets, 2], dtype='object') j = 0 for i in metric_list: net_met = '%s' % i.func_name try: net_met_val = float(i(G)) except: net_met_val = np.nan net_met_arr[j, 0] = net_met net_met_arr[j, 1] = net_met_val print(net_met) print(str(net_met_val)) j = j + 1 if NETWORK != None: bc_vector = betweenness_centrality(G) bc_vals = bc_vector.values() bc_nodes = bc_vector.keys() num_nodes = len(bc_nodes) bc_arr = np.zeros([num_nodes, 2], dtype='object') j = 0 for i in range(num_nodes): bc_arr[j, 0] = NETWORK + '_' + str(bc_nodes[j]) + '_bet_cent' bc_arr[j, 1] = bc_vals[j] print(NETWORK + '_' + str(bc_nodes[j])) print(str(bc_vals[j])) j = j + 1 net_met_val_list = list(net_met_arr[:, 1]) + list(bc_arr[:, 1]) else: net_met_val_list = list(net_met_arr[:, 1]) metric_list_names = [] for i in metric_list: metric_list_names.append('%s' % i.func_name) if NETWORK != None: for i in bc_arr[:, 0]: metric_list_names.append(i) ##Save metric names as pickle met_list_picke_path = os.path.dirname( os.path.abspath(est_path)) + '/met_list_pickle' cPickle.dump(metric_list_names, open(met_list_picke_path, 'wb')) ############################################################### ############################################################### ############################################################### ##Save results to csv if 'inv' in est_path: if NETWORK != None: out_path = dir_path + '/' + ID + '_' + NETWORK + '_net_mets_inv_sps_cov.csv' else: out_path = dir_path + '/' + ID + '_net_mets_inv_sps_cov.csv' else: if NETWORK != None: out_path = dir_path + '/' + ID + '_' + NETWORK + '_net_mets_corr.csv' else: out_path = dir_path + '/' + ID + '_net_mets_corr.csv' np.savetxt(out_path, net_met_val_list) return (out_path) return (metric_list_names)
def extractnetstats(ID, NETWORK, thr, conn_model, est_path1, out_file=None): import pynets from pynets import netstats, thresholding ##Load and threshold matrix in_mat = np.array(genfromtxt(est_path1)) in_mat = thresholding.autofix(in_mat) ##Get hyperbolic tangent of matrix if non-sparse (i.e. fischer r-to-z transform) if conn_model == 'corr': in_mat = np.arctanh(in_mat) ##Get dir_path dir_path = os.path.dirname(os.path.realpath(est_path1)) ##Assign Weight matrix mat_wei = in_mat ##Load numpy matrix as networkx graph G=nx.from_numpy_matrix(mat_wei) ##Create Binary matrix #mat_bin = weight_conversion(in_mat, 'binarize') ##Load numpy matrix as networkx graph #G_bin=nx.from_numpy_matrix(mat_bin) ##Create Length matrix mat_len = thresholding.weight_conversion(in_mat, 'lengths') ##Load numpy matrix as networkx graph G_len=nx.from_numpy_matrix(mat_len) ##Save gephi files if NETWORK != None: nx.write_graphml(G, dir_path + '/' + ID + '_' + NETWORK + '.graphml') else: nx.write_graphml(G, dir_path + '/' + ID + '.graphml') ############################################################### ########### Calculate graph metrics from graph G ############## ############################################################### import random import itertools from itertools import permutations from networkx.algorithms import degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, transitivity, betweenness_centrality, rich_club_coefficient, eigenvector_centrality, communicability_centrality from pynets.netstats import efficiency, global_efficiency, local_efficiency, create_random_graph, smallworldness_measure, smallworldness, modularity ##For non-nodal scalar metrics from networkx.algorithms library, add the name of the function to metric_list for it to be automatically calculated. ##For non-nodal scalar metrics from custom functions, add the name of the function to metric_list and add the function (with a G-only input) to the netstats module. #metric_list = [global_efficiency, local_efficiency, smallworldness, degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, transitivity] metric_list = [global_efficiency, local_efficiency, degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, transitivity] ##Iteratively run functions from above metric list num_mets = len(metric_list) net_met_arr = np.zeros([num_mets, 2], dtype='object') j=0 for i in metric_list: met_name = str(i).split('<function ')[1].split(' at')[0] if NETWORK != None: net_met = NETWORK + '_' + met_name else: net_met = met_name try: net_met_val = float(i(G)) except: net_met_val = np.nan net_met_arr[j,0] = net_met net_met_arr[j,1] = net_met_val print(net_met) print(str(net_met_val)) print('\n') j = j + 1 net_met_val_list = list(net_met_arr[:,1]) ##Calculate modularity using the Louvain algorithm [community_aff, modularity] = modularity(mat_wei) ##betweenness_centrality try: bc_vector = betweenness_centrality(G_len) print('Extracting Betweeness Centrality vector for all network nodes...') bc_vals = list(bc_vector.values()) bc_nodes = list(bc_vector.keys()) num_nodes = len(bc_nodes) bc_arr = np.zeros([num_nodes + 1, 2], dtype='object') j=0 for i in range(num_nodes): if NETWORK != None: bc_arr[j,0] = NETWORK + '_' + str(bc_nodes[j]) + '_betw_cent' print('\n' + NETWORK + '_' + str(bc_nodes[j]) + '_betw_cent') else: bc_arr[j,0] = 'WholeBrain_' + str(bc_nodes[j]) + '_betw_cent' print('\n' + 'WholeBrain_' + str(bc_nodes[j]) + '_betw_cent') try: bc_arr[j,1] = bc_vals[j] except: bc_arr[j,1] = np.nan print(str(bc_vals[j])) j = j + 1 bc_val_list = list(bc_arr[:,1]) bc_arr[num_nodes,0] = NETWORK + '_MEAN_betw_cent' nonzero_arr_betw_cent = np.delete(bc_arr[:,1], [0]) bc_arr[num_nodes,1] = np.mean(nonzero_arr_betw_cent) print('\n' + 'Mean Betweenness Centrality across all nodes: ' + str(bc_arr[num_nodes,1]) + '\n') except: print('Betweeness Centrality calculation failed. Skipping...') bc_val_list = [] pass ##eigenvector_centrality try: ec_vector = eigenvector_centrality(G_len) print('Extracting Eigenvector Centrality vector for all network nodes...') ec_vals = list(ec_vector.values()) ec_nodes = list(ec_vector.keys()) num_nodes = len(ec_nodes) ec_arr = np.zeros([num_nodes + 1, 2], dtype='object') j=0 for i in range(num_nodes): if NETWORK != None: ec_arr[j,0] = NETWORK + '_' + str(ec_nodes[j]) + '_eig_cent' print('\n' + NETWORK + '_' + str(ec_nodes[j]) + '_eig_cent') else: ec_arr[j,0] = 'WholeBrain_' + str(ec_nodes[j]) + '_eig_cent' print('\n' + 'WholeBrain_' + str(ec_nodes[j]) + '_eig_cent') try: ec_arr[j,1] = ec_vals[j] except: ec_arr[j,1] = np.nan print(str(ec_vals[j])) j = j + 1 ec_val_list = list(ec_arr[:,1]) ec_arr[num_nodes,0] = NETWORK + '_MEAN_eig_cent' nonzero_arr_eig_cent = np.delete(ec_arr[:,1], [0]) ec_arr[num_nodes,1] = np.mean(nonzero_arr_eig_cent) print('\n' + 'Mean Eigenvector Centrality across all nodes: ' + str(ec_arr[num_nodes,1]) + '\n') except: print('Eigenvector Centrality calculation failed. Skipping...') ec_val_list = [] pass ##communicability_centrality try: cc_vector = communicability_centrality(G_len) print('Extracting Communicability Centrality vector for all network nodes...') cc_vals = list(cc_vector.values()) cc_nodes = list(cc_vector.keys()) num_nodes = len(cc_nodes) cc_arr = np.zeros([num_nodes + 1, 2], dtype='object') j=0 for i in range(num_nodes): if NETWORK != None: cc_arr[j,0] = NETWORK + '_' + str(cc_nodes[j]) + '_comm_cent' print('\n' + NETWORK + '_' + str(cc_nodes[j]) + '_comm_cent') else: cc_arr[j,0] = 'WholeBrain_' + str(cc_nodes[j]) + '_comm_cent' print('\n' + 'WholeBrain_' + str(cc_nodes[j]) + '_comm_cent') try: cc_arr[j,1] = cc_vals[j] except: cc_arr[j,1] = np.nan print(str(cc_vals[j])) j = j + 1 cc_val_list = list(cc_arr[:,1]) cc_arr[num_nodes,0] = NETWORK + '_MEAN_comm_cent' nonzero_arr_comm_cent = np.delete(cc_arr[:,1], [0]) cc_arr[num_nodes,1] = np.mean(nonzero_arr_comm_cent) print('\n' + 'Mean Communicability Centrality across all nodes: ' + str(cc_arr[num_nodes,1]) + '\n') except: print('Communicability Centrality calculation failed. Skipping...') cc_val_list = [] pass ##rich_club_coefficient try: rc_vector = rich_club_coefficient(G, normalized=True) print('Extracting Rich Club Coefficient vector for all network nodes...') rc_vals = list(rc_vector.values()) rc_edges = list(rc_vector.keys()) num_edges = len(rc_edges) rc_arr = np.zeros([num_edges + 1, 2], dtype='object') j=0 for i in range(num_edges): if NETWORK != None: rc_arr[j,0] = NETWORK + '_' + str(rc_edges[j]) + '_rich_club' print('\n' + NETWORK + '_' + str(rc_edges[j]) + '_rich_club') else: cc_arr[j,0] = 'WholeBrain_' + str(rc_nodes[j]) + '_rich_club' print('\n' + 'WholeBrain_' + str(rc_nodes[j]) + '_rich_club') try: rc_arr[j,1] = rc_vals[j] except: rc_arr[j,1] = np.nan print(str(rc_vals[j])) j = j + 1 ##Add mean rc_val_list = list(rc_arr[:,1]) rc_arr[num_edges,0] = NETWORK + '_MEAN_rich_club' nonzero_arr_rich_club = np.delete(rc_arr[:,1], [0]) rc_arr[num_edges,1] = np.mean(nonzero_arr_rich_club) print('\n' + 'Mean Rich Club Coefficient across all edges: ' + str(rc_arr[num_edges,1]) + '\n') except: print('Rich Club calculation failed. Skipping...') rc_val_list = [] pass ##Create a list of metric names for scalar metrics metric_list_names = [] net_met_val_list_final = net_met_val_list for i in net_met_arr[:,0]: metric_list_names.append(i) ##Add modularity measure try: if NETWORK != None: metric_list_names.append(NETWORK + '_Modularity') else: metric_list_names.append('WholeBrain_Modularity') net_met_val_list_final.append(modularity) except: pass ##Add centrality and rich club measures try: for i in bc_arr[:,0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(bc_arr[:,1]) except: pass try: for i in ec_arr[:,0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(ec_arr[:,1]) except: pass try: for i in cc_arr[:,0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(cc_arr[:,1]) except: pass try: for i in rc_arr[:,0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(rc_arr[:,1]) except: pass ##Save metric names as pickle try: import cPickle except ImportError: import _pickle as cPickle if NETWORK != None: met_list_picke_path = os.path.dirname(os.path.abspath(est_path1)) + '/met_list_pickle_' + NETWORK else: met_list_picke_path = os.path.dirname(os.path.abspath(est_path1)) + '/met_list_pickle_WB' cPickle.dump(metric_list_names, open(met_list_picke_path, 'wb')) ##Save results to csv if 'inv' in est_path1: if NETWORK != None: out_path = dir_path + '/' + ID + '_' + NETWORK + '_net_mets_sps_cov_' + str(thr) + '.csv' else: out_path = dir_path + '/' + ID + '_net_mets_sps_cov_' + str(thr) + '.csv' else: if NETWORK != None: out_path = dir_path + '/' + ID + '_' + NETWORK + '_net_mets_corr_' + str(thr) + '.csv' else: out_path = dir_path + '/' + ID + '_net_mets_corr_' + str(thr) + '.csv' np.savetxt(out_path, net_met_val_list_final) return(out_path)
def extractnetstats(ID, network, thr, conn_model, est_path, roi, prune, node_size, norm, binary, custom_weight=None): """ Function interface for performing fully-automated graph analysis. Parameters ---------- ID : str A subject id or other unique identifier. network : str Resting-state network based on Yeo-7 and Yeo-17 naming (e.g. 'Default') used to filter nodes in the study of brain subgraphs. thr : float The value, between 0 and 1, used to threshold the graph using any variety of methods triggered through other options. conn_model : str Connectivity estimation model (e.g. corr for correlation, cov for covariance, sps for precision covariance, partcorr for partial correlation). sps type is used by default. est_path : str File path to the thresholded graph, conn_matrix_thr, saved as a numpy array in .npy format. roi : str File path to binarized/boolean region-of-interest Nifti1Image file. prune : bool Indicates whether to prune final graph of disconnected nodes/isolates. node_size : int Spherical centroid node size in the case that coordinate-based centroids are used as ROI's. norm : int Indicates method of normalizing resulting graph. binary : bool Indicates whether to binarize resulting graph edges to form an unweighted graph. custom_weight : float The edge attribute that holds the numerical value used as a weight. If None, then each edge has weight 1. Default is None. Returns ------- out_path : str Path to .csv file where graph analysis results are saved. """ import pandas as pd import yaml try: import cPickle as pickle except ImportError: import _pickle as pickle from pathlib import Path from pynets import thresholding, utils # Advanced options fmt = 'edgelist_ssv' est_path_fmt = "%s%s" % ('.', est_path.split('.')[-1]) # Load and threshold matrix if est_path_fmt == '.txt': in_mat_raw = np.array(np.genfromtxt(est_path)) else: in_mat_raw = np.array(np.load(est_path)) # De-diagnal in_mat = np.array(np.array(thresholding.autofix(in_mat_raw))) # Normalize connectivity matrix # Force edges to values between 0-1 if norm == 1: in_mat = thresholding.normalize(in_mat) # Apply log10 elif norm == 2: in_mat = np.log10(in_mat) else: pass # Correct nan's and inf's in_mat[np.isnan(in_mat)] = 0 in_mat[np.isinf(in_mat)] = 1 # Get hyperbolic tangent (i.e. fischer r-to-z transform) of matrix if non-covariance if (conn_model == 'corr') or (conn_model == 'partcorr'): in_mat = np.arctanh(in_mat) # Binarize graph if binary is True: in_mat = thresholding.binarize(in_mat) # Get dir_path dir_path = os.path.dirname(os.path.realpath(est_path)) # Load numpy matrix as networkx graph G_pre = nx.from_numpy_matrix(in_mat) # Prune irrelevant nodes (i.e. nodes who are fully disconnected from the graph and/or those whose betweenness # centrality are > 3 standard deviations below the mean) if prune == 1: [G, _] = prune_disconnected(G_pre) elif prune == 2: [G, _] = most_important(G_pre) else: G = G_pre # Get corresponding matrix in_mat = np.array(nx.to_numpy_matrix(G)) # Saved pruned if (prune != 0) and (prune is not None): final_mat_path = "%s%s%s" % (est_path.split(est_path_fmt)[0], '_pruned_mat', est_path_fmt) utils.save_mat(in_mat, final_mat_path, fmt) # Print graph summary print("%s%.2f%s" % ('\n\nThreshold: ', 100 * float(thr), '%')) print("%s%s" % ('Source File: ', est_path)) info_list = list(nx.info(G).split('\n'))[2:] for i in info_list: print(i) if nx.is_connected(G) is True: frag = False print('Graph is connected...') else: frag = True print('Warning: Graph is fragmented...\n') # Create Length matrix mat_len = thresholding.weight_conversion(in_mat, 'lengths') # Load numpy matrix as networkx graph G_len = nx.from_numpy_matrix(mat_len) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # Calculate global and local metrics from graph G # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # import community from networkx.algorithms import degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, transitivity, betweenness_centrality, eigenvector_centrality, communicability_betweenness_centrality, clustering, degree_centrality, rich_club_coefficient, sigma from pynets.stats.netstats import average_local_efficiency, global_efficiency, participation_coef, participation_coef_sign, diversity_coef_sign # For non-nodal scalar metrics from custom functions, add the name of the function to metric_list and add the # function (with a G-only input) to the netstats module. metric_list_glob = [ global_efficiency, average_local_efficiency, degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, transitivity, sigma ] metric_list_comm = ['louvain_modularity'] # with open("%s%s" % (str(Path(__file__).parent), '/global_graph_measures.yaml'), 'r') as stream: # try: # metric_dict_global = yaml.load(stream) # metric_list_global = metric_dict_global['metric_list_global'] # print("%s%s%s" % ('\n\nCalculating global measures:\n', metric_list_global, '\n\n')) # except FileNotFoundError: # print('Failed to parse global_graph_measures.yaml') with open( "%s%s" % (str(Path(__file__).parent), '/nodal_graph_measures.yaml'), 'r') as stream: try: metric_dict_nodal = yaml.load(stream) metric_list_nodal = metric_dict_nodal['metric_list_nodal'] print("%s%s%s" % ('\n\nCalculating nodal measures:\n', metric_list_nodal, '\n\n')) except FileNotFoundError: print('Failed to parse nodal_graph_measures.yaml') # Note the use of bare excepts in preceding blocks. Typically, this is considered bad practice in python. Here, # we are exploiting it intentionally to facilitate uninterrupted, automated graph analysis even when algorithms are # undefined. In those instances, solutions are assigned NaN's. # Iteratively run functions from above metric list that generate single scalar output num_mets = len(metric_list_glob) net_met_arr = np.zeros([num_mets, 2], dtype='object') j = 0 for i in metric_list_glob: met_name = str(i).split('<function ')[1].split(' at')[0] net_met = met_name try: try: net_met_val = raw_mets(G, i, custom_weight) except: print("%s%s%s" % ('WARNING: ', net_met, ' failed for graph G.')) net_met_val = np.nan except: print("%s%s%s" % ('WARNING: ', str(i), ' is undefined for graph G')) net_met_val = np.nan net_met_arr[j, 0] = net_met net_met_arr[j, 1] = net_met_val print(net_met) print(str(net_met_val)) print('\n') j = j + 1 net_met_val_list = list(net_met_arr[:, 1]) # Create a list of metric names for scalar metrics metric_list_names = [] net_met_val_list_final = net_met_val_list for i in net_met_arr[:, 0]: metric_list_names.append(i) # Run miscellaneous functions that generate multiple outputs # Calculate modularity using the Louvain algorithm if 'louvain_modularity' in metric_list_comm: try: ci = community.best_partition(G) modularity = community.community_louvain.modularity(ci, G) metric_list_names.append('modularity') net_met_val_list_final.append(modularity) except: print('Louvain modularity calculation is undefined for graph G') pass # Participation Coefficient by louvain community if 'participation_coefficient' in metric_list_nodal: try: if ci is None: raise KeyError( 'Participation coefficient cannot be calculated for graph G in the absence of a ' 'community affiliation vector') if len(in_mat[in_mat < 0.0]) > 0: pc_vector = participation_coef_sign(in_mat, ci) else: pc_vector = participation_coef(in_mat, ci) print( '\nExtracting Participation Coefficient vector for all network nodes...' ) pc_vals = list(pc_vector) pc_edges = list(range(len(pc_vector))) num_edges = len(pc_edges) pc_arr = np.zeros([num_edges + 1, 2], dtype='object') j = 0 for i in range(num_edges): pc_arr[j, 0] = "%s%s" % (str(pc_edges[j]), '_partic_coef') try: pc_arr[j, 1] = pc_vals[j] except: print("%s%s%s" % ('Participation coefficient is undefined for node ', str(j), ' of graph G')) pc_arr[j, 1] = np.nan j = j + 1 # Add mean pc_arr[num_edges, 0] = 'average_participation_coefficient' nonzero_arr_partic_coef = np.delete(pc_arr[:, 1], [0]) pc_arr[num_edges, 1] = np.mean(nonzero_arr_partic_coef) print("%s%s" % ('Mean Participation Coefficient across edges: ', str(pc_arr[num_edges, 1]))) for i in pc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(pc_arr[:, 1]) except: print('Participation coefficient cannot be calculated for graph G') pass # Diversity Coefficient by louvain community if 'diversity_coefficient' in metric_list_nodal: try: if ci is None: raise KeyError( 'Diversity coefficient cannot be calculated for graph G in the absence of a community ' 'affiliation vector') [dc_vector, _] = diversity_coef_sign(in_mat, ci) print( '\nExtracting Diversity Coefficient vector for all network nodes...' ) dc_vals = list(dc_vector) dc_edges = list(range(len(dc_vector))) num_edges = len(dc_edges) dc_arr = np.zeros([num_edges + 1, 2], dtype='object') j = 0 for i in range(num_edges): dc_arr[j, 0] = "%s%s" % (str(dc_edges[j]), '_diversity_coef') try: dc_arr[j, 1] = dc_vals[j] except: print("%s%s%s" % ('Diversity coefficient is undefined for node ', str(j), ' of graph G')) dc_arr[j, 1] = np.nan j = j + 1 # Add mean dc_arr[num_edges, 0] = 'average_diversity_coefficient' nonzero_arr_diversity_coef = np.delete(dc_arr[:, 1], [0]) dc_arr[num_edges, 1] = np.mean(nonzero_arr_diversity_coef) print("%s%s" % ('Mean Diversity Coefficient across edges: ', str(dc_arr[num_edges, 1]))) for i in dc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(dc_arr[:, 1]) except: print('Diversity coefficient cannot be calculated for graph G') pass # Local Efficiency if 'local_efficiency' in metric_list_nodal: try: le_vector = local_efficiency(G) print( '\nExtracting Local Efficiency vector for all network nodes...' ) le_vals = list(le_vector.values()) le_nodes = list(le_vector.keys()) num_nodes = len(le_nodes) le_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): le_arr[j, 0] = "%s%s" % (str(le_nodes[j]), '_local_efficiency') try: le_arr[j, 1] = le_vals[j] except: print( "%s%s%s" % ('Local efficiency is undefined for node ', str(j), ' of graph G')) le_arr[j, 1] = np.nan j = j + 1 le_arr[num_nodes, 0] = 'average_local_efficiency_nodewise' nonzero_arr_le = np.delete(le_arr[:, 1], [0]) le_arr[num_nodes, 1] = np.mean(nonzero_arr_le) print("%s%s" % ('Mean Local Efficiency across nodes: ', str(le_arr[num_nodes, 1]))) for i in le_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(le_arr[:, 1]) except: print('Local efficiency cannot be calculated for graph G') pass # Local Clustering if 'local_clustering' in metric_list_nodal: try: cl_vector = clustering(G) print( '\nExtracting Local Clustering vector for all network nodes...' ) cl_vals = list(cl_vector.values()) cl_nodes = list(cl_vector.keys()) num_nodes = len(cl_nodes) cl_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): cl_arr[j, 0] = "%s%s" % (str(cl_nodes[j]), '_local_clustering') try: cl_arr[j, 1] = cl_vals[j] except: print( "%s%s%s" % ('Local clustering is undefined for node ', str(j), ' of graph G')) cl_arr[j, 1] = np.nan j = j + 1 cl_arr[num_nodes, 0] = 'average_local_efficiency_nodewise' nonzero_arr_cl = np.delete(cl_arr[:, 1], [0]) cl_arr[num_nodes, 1] = np.mean(nonzero_arr_cl) print("%s%s" % ('Mean Local Clustering across nodes: ', str(cl_arr[num_nodes, 1]))) for i in cl_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(cl_arr[:, 1]) except: print('Local clustering cannot be calculated for graph G') pass # Degree centrality if 'degree_centrality' in metric_list_nodal: try: dc_vector = degree_centrality(G) print( '\nExtracting Degree Centrality vector for all network nodes...' ) dc_vals = list(dc_vector.values()) dc_nodes = list(dc_vector.keys()) num_nodes = len(dc_nodes) dc_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): dc_arr[j, 0] = "%s%s" % (str(dc_nodes[j]), '_degree_centrality') try: dc_arr[j, 1] = dc_vals[j] except: print( "%s%s%s" % ('Degree centrality is undefined for node ', str(j), ' of graph G')) dc_arr[j, 1] = np.nan j = j + 1 dc_arr[num_nodes, 0] = 'average_degree_cent' nonzero_arr_dc = np.delete(dc_arr[:, 1], [0]) dc_arr[num_nodes, 1] = np.mean(nonzero_arr_dc) print("%s%s" % ('Mean Degree Centrality across nodes: ', str(dc_arr[num_nodes, 1]))) for i in dc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(dc_arr[:, 1]) except: print('Degree centrality cannot be calculated for graph G') pass # Betweenness Centrality if 'betweenness_centrality' in metric_list_nodal: try: bc_vector = betweenness_centrality(G_len, normalized=True) print( '\nExtracting Betweeness Centrality vector for all network nodes...' ) bc_vals = list(bc_vector.values()) bc_nodes = list(bc_vector.keys()) num_nodes = len(bc_nodes) bc_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): bc_arr[j, 0] = "%s%s" % (str( bc_nodes[j]), '_betweenness_centrality') try: bc_arr[j, 1] = bc_vals[j] except: print("%s%s%s" % ('Betweeness centrality is undefined for node ', str(j), ' of graph G')) bc_arr[j, 1] = np.nan j = j + 1 bc_arr[num_nodes, 0] = 'average_betweenness_centrality' nonzero_arr_betw_cent = np.delete(bc_arr[:, 1], [0]) bc_arr[num_nodes, 1] = np.mean(nonzero_arr_betw_cent) print("%s%s" % ('Mean Betweenness Centrality across nodes: ', str(bc_arr[num_nodes, 1]))) for i in bc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(bc_arr[:, 1]) except: print('Betweenness centrality cannot be calculated for graph G') pass # Eigenvector Centrality if 'eigenvector_centrality' in metric_list_nodal: try: ec_vector = eigenvector_centrality(G, max_iter=1000) print( '\nExtracting Eigenvector Centrality vector for all network nodes...' ) ec_vals = list(ec_vector.values()) ec_nodes = list(ec_vector.keys()) num_nodes = len(ec_nodes) ec_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): ec_arr[j, 0] = "%s%s" % (str( ec_nodes[j]), '_eigenvector_centrality') try: ec_arr[j, 1] = ec_vals[j] except: print("%s%s%s" % ('Eigenvector centrality is undefined for node ', str(j), ' of graph G')) ec_arr[j, 1] = np.nan j = j + 1 ec_arr[num_nodes, 0] = 'average_eigenvector_centrality' nonzero_arr_eig_cent = np.delete(ec_arr[:, 1], [0]) ec_arr[num_nodes, 1] = np.mean(nonzero_arr_eig_cent) print("%s%s" % ('Mean Eigenvector Centrality across nodes: ', str(ec_arr[num_nodes, 1]))) for i in ec_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(ec_arr[:, 1]) except: print('Eigenvector centrality cannot be calculated for graph G') pass # Communicability Centrality if 'communicability_centrality' in metric_list_nodal: try: cc_vector = communicability_betweenness_centrality(G, normalized=True) print( '\nExtracting Communicability Centrality vector for all network nodes...' ) cc_vals = list(cc_vector.values()) cc_nodes = list(cc_vector.keys()) num_nodes = len(cc_nodes) cc_arr = np.zeros([num_nodes + 1, 2], dtype='object') j = 0 for i in range(num_nodes): cc_arr[j, 0] = "%s%s" % (str( cc_nodes[j]), '_communicability_centrality') try: cc_arr[j, 1] = cc_vals[j] except: print("%s%s%s" % ('Communicability centrality is undefined for node ', str(j), ' of graph G')) cc_arr[j, 1] = np.nan j = j + 1 cc_arr[num_nodes, 0] = 'average_communicability_centrality' nonzero_arr_comm_cent = np.delete(cc_arr[:, 1], [0]) cc_arr[num_nodes, 1] = np.mean(nonzero_arr_comm_cent) print("%s%s" % ('Mean Communicability Centrality across nodes: ', str(cc_arr[num_nodes, 1]))) for i in cc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(cc_arr[:, 1]) except: print( 'Communicability centrality cannot be calculated for graph G') pass # Rich club coefficient if 'rich_club_coefficient' in metric_list_nodal: try: rc_vector = rich_club_coefficient(G, normalized=True) print( '\nExtracting Rich Club Coefficient vector for all network nodes...' ) rc_vals = list(rc_vector.values()) rc_edges = list(rc_vector.keys()) num_edges = len(rc_edges) rc_arr = np.zeros([num_edges + 1, 2], dtype='object') j = 0 for i in range(num_edges): rc_arr[j, 0] = "%s%s" % (str(rc_edges[j]), '_rich_club') try: rc_arr[j, 1] = rc_vals[j] except: print("%s%s%s" % ('Rich club coefficient is undefined for node ', str(j), ' of graph G')) rc_arr[j, 1] = np.nan j = j + 1 # Add mean rc_arr[num_edges, 0] = 'average_rich_club_coefficient' nonzero_arr_rich_club = np.delete(rc_arr[:, 1], [0]) rc_arr[num_edges, 1] = np.mean(nonzero_arr_rich_club) print("%s%s" % ('Mean Rich Club Coefficient across edges: ', str(rc_arr[num_edges, 1]))) for i in rc_arr[:, 0]: metric_list_names.append(i) net_met_val_list_final = net_met_val_list_final + list(rc_arr[:, 1]) except: print('Rich club coefficient cannot be calculated for graph G') pass if roi: met_list_picke_path = "%s%s%s%s" % ( os.path.dirname(os.path.abspath(est_path)), '/net_met_list', "%s" % ("%s%s%s" % ('_', network, '_') if network else "_"), os.path.basename(roi).split('.')[0]) else: if network: met_list_picke_path = "%s%s%s" % (os.path.dirname( os.path.abspath(est_path)), '/net_met_list_', network) else: met_list_picke_path = "%s%s" % (os.path.dirname( os.path.abspath(est_path)), '/net_met_list') pickle.dump(metric_list_names, open(met_list_picke_path, 'wb'), protocol=2) # And save results to csv out_path = utils.create_csv_path(ID, network, conn_model, thr, roi, dir_path, node_size) np.savetxt(out_path, net_met_val_list_final, delimiter='\t') if frag is True: out_path_neat = "%s%s" % (out_path.split('.csv')[0], '_frag_neat.csv') else: out_path_neat = "%s%s" % (out_path.split('.csv')[0], '_neat.csv') df = pd.DataFrame.from_dict(dict( zip(metric_list_names, net_met_val_list_final)), orient='index').transpose() df.to_csv(out_path_neat, index=False) return out_path
def betweenness_centrality(G, k=None, normalized=True, weight=None, endpoints=False, seed=None): r"""Compute the shortest-path betweenness centrality for nodes. Betweenness centrality of a node $v$ is the sum of the fraction of all-pairs shortest paths that pass through $v$ .. math:: c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)} where $V$ is the set of nodes, $\sigma(s, t)$ is the number of shortest $(s, t)$-paths, and $\sigma(s, t|v)$ is the number of those paths passing through some node $v$ other than $s, t$. If $s = t$, $\sigma(s, t) = 1$, and if $v \in {s, t}$, $\sigma(s, t|v) = 0$ [2]_. Parameters ---------- G : graph A NetworkX graph. normalized : bool, optional If True the betweenness values are normalized by `2/((n-1)(n-2))` for graphs, and `1/((n-1)(n-2))` for directed graphs where `n` is the number of nodes in G. weight : None or string, optional (default=None) If None, all edge weights are considered equal. Otherwise holds the name of the edge attribute used as weight. Weights are used to calculate weighted shortest paths, so they are interpreted as distances. endpoints : bool, optional If True include the endpoints in the shortest path counts. Returns ------- nodes : dictionary Dictionary of nodes with betweenness centrality as the value. See Also -------- edge_betweenness_centrality load_centrality Notes ----- The algorithm is from Ulrik Brandes [1]_. See [4]_ for the original first published version and [2]_ for details on algorithms for variations and related metrics. For approximate betweenness calculations set k=#samples to use k nodes ("pivots") to estimate the betweenness values. For an estimate of the number of pivots needed see [3]_. For weighted graphs the edge weights must be greater than zero. Zero edge weights can produce an infinite number of equal length paths between pairs of nodes. The total number of paths between source and target is counted differently for directed and undirected graphs. Directed paths are easy to count. Undirected paths are tricky: should a path from "u" to "v" count as 1 undirected path or as 2 directed paths? For betweenness_centrality we report the number of undirected paths when G is undirected. For betweenness_centrality_subset the reporting is different. If the source and target subsets are the same, then we want to count undirected paths. But if the source and target subsets differ -- for example, if sources is {0} and targets is {1}, then we are only counting the paths in one direction. They are undirected paths but we are counting them in a directed way. To count them as undirected paths, each should count as half a path. References ---------- .. [1] Ulrik Brandes: A Faster Algorithm for Betweenness Centrality. Journal of Mathematical Sociology 25(2):163-177, 2001. https://doi.org/10.1080/0022250X.2001.9990249 .. [2] Ulrik Brandes: On Variants of Shortest-Path Betweenness Centrality and their Generic Computation. Social Networks 30(2):136-145, 2008. https://doi.org/10.1016/j.socnet.2007.11.001 .. [3] Ulrik Brandes and Christian Pich: Centrality Estimation in Large Networks. International Journal of Bifurcation and Chaos 17(7):2303-2318, 2007. https://dx.doi.org/10.1142/S0218127407018403 .. [4] Linton C. Freeman: A set of measures of centrality based on betweenness. Sociometry 40: 35–41, 1977 https://doi.org/10.2307/3033543 """ @context_to_dict @project_to_simple def _betweenness_centrality(G, k=None, normalized=True, weight=None, endpoints=False, seed=None): algorithm = "betweenness_centrality" if weight is not None: algorithm = "betweenness_centrality_generic" return AppAssets(algo=algorithm, context="vertex_data")(G, normalized=normalized, endpoints=endpoints) if not isinstance(G, nx.Graph) or seed is not None: return nxa.betweenness_centrality(G, k, normalized, weight, endpoints, seed) return _betweenness_centrality(G, k=k, normalized=normalized, weight=weight, endpoints=endpoints, seed=seed)
with open("../data/wallet.json", "r") as file: data = load(file) for package, deps in data.items(): dep_graph.add_node(package) for dep in deps: dep_graph.add_edge(package, dep) dep_graph.remove_node("blockchain/unused-My-Wallet") return dep_graph if __name__ == "__main__": graph = build_complete_graph() betw = betweenness_centrality(graph) betw = sorted(betw.items(), key=lambda x: x[1], reverse=True) with open("../data/wallet_url_improved.json", "r") as file: urls = load(file) results = [] for package, betw_score in tqdm(betw[100:200]): cs = 0 if package not in urls else fetch_criticality(package) results.append({"name": package, "betw": betw_score, "cs": cs}) with open("../data/wallet_criticality_100-200.json", "w") as file: dump(results, file, indent=4) # nx.write_gml(dep_graph, "../data/dep_graph.gml")