示例#1
0
def average_betweenness_centrality(G, connected_only=True):
    # Betweenness centrality of a node v is the sum of the fraction of all-pairs
    # shortest paths that pass through v.
    # Only densely connected hidden nodes can have positive betweenness_centrality
    HBC = betweenness_centrality(G)
    if connected_only:
        cm = np.array(nx.adjacency_matrix(G).todense())
        num_hidden = number_of_densely_connected_nodes(cm)
    else:
        num_hidden = len(agent.hidden_ixs)

    avHBC = sum([HBC[b] for b in HBC]) / num_hidden
    return avHBC
def compute_stats(df_fname, dist_mat=None):
    df = pd.read_hdf(df_fname)
    # Stats organized as follows:
    #   name : lambda/function to be called on (g)
    dists = np.loadtxt(dist_mat)

    stats = {
        "edgecount":
        nxf.number_of_edges,
        "globaleffic":
        nxa.global_efficiency,
        "degree":
        lambda g: dict(nxf.degree(g)).values(),
        "modularity":
        lambda g: nntm.consensus_modularity(nx.adj_matrix(g).toarray(),
                                            seed=42)[1].mean(),
        "assort":
        nxa.assortativity.degree_assortativity_coefficient,
        "avplength":
        lambda g: np.mean(connection_length(g, dists=dists)),
        "weight":
        lambda g: list(nxf.get_edge_attributes(g, 'weight').values()),
        "ccoeff":
        lambda g: list(nxa.clustering(g, weight=None).values()),
        "betweenness":
        lambda g: list(
            nxa.betweenness_centrality(g, weight='weight').values()),
        "plength":
        lambda g: connection_length(g, dists=dists)
    }

    # create dict (and eventual DF) column per stat with the following struct:
    stat_results = {'index': []}
    stat_results.update({stat_name: [] for stat_name in stats.keys()})

    for idx, row in df.iterrows():
        tmpg = nx.Graph(row.graph)
        stat_results['index'] += [row['index']]
        for stat_name, stat_fn in stats.items():
            tmps = stat_fn(tmpg)
            try:
                len(tmps)
                stat_results[stat_name] += [np.array(list(tmps))]
            except TypeError:
                stat_results[stat_name] += [tmps]

    stat_df = pd.DataFrame.from_dict(stat_results)
    stat_df.to_hdf(df_fname.replace('.h5', '_stats.h5'), "stats", mode="w")
示例#3
0
 def calculate_betweenness_centrality(self):
     G = self._graph
     self._betweenness_centralities = nxa.betweenness_centrality(G)
示例#4
0
from networkx.algorithms import betweenness_centrality

from utils import build_complete_graph


dep_graph = build_complete_graph()

betw = betweenness_centrality(dep_graph)
betw = sorted(betw.items(), key=lambda x: x[1], reverse=True)

num_package = 13
for package, betw_score in betw[:num_package]:
    print(f"{package}, {betw_score:.6f}")
 def getBetweenessCentrality(self, G):
     return nalgos.betweenness_centrality(G)
示例#6
0
        V = snap.TIntFltH()
        E = snap.TIntPrFltH()
        snap.GetBetweennessCentr(G_snap, V, E, 1.0)
        values = []
        for i in V:
            values.append(V[i])
        n = len(values)
        betweenness_coef = sum(values) / ((n - 1) * (n - 2))
        report += ["Average betweenness coefficient: %f" % float(betweenness_coef)]
        print report[-1]
    else:
        # Clustering coefficient
        average_clustering_coef = nx.average_clustering(G)
        report += ["Average clustering coefficient: %f" % float(average_clustering_coef)]
        print report[-1]
        values = algos.betweenness_centrality(G)
        n = len(values)
        betweenness_coef = sum(values) / ((n - 1) * (n - 2))
        report += ["Average betweenness coefficient: %f" % float(betweenness_coef)]
        print report[-1]
    # Saving eigs for faster computations
    if not (os.path.exists("outputs/q4/intermediates/vecs.npy") and
                os.path.exists("outputs/q4/intermediates/vals.npy")):
        laplacian_matrix = nx.linalg.laplacian_matrix(G).todense()
        vals, vecs = np.linalg.eigh(laplacian_matrix)
        np.save('outputs/q4/intermediates/vecs.npy', vecs)
        np.save('outputs/q4/intermediates/vals.npy', vals)
    else:
        vecs = np.load('outputs/q4/intermediates/vecs.npy')
        vals = np.load('outputs/q4/intermediates/vals.npy')
示例#7
0
'''
a random graph can be generated using the Erdos-Renyi algorithm for example
'''
G = nx.erdos_renyi_graph(100, 0.1)

# draw network
nx.draw(G)

# degree
degree = degree_centrality(G)

# eigenvector_centrality
ec = eigenvector_centrality(G)

# betweeness centrality
bc = betweenness_centrality(G)

# visualize results
# --+ df
df = pd.DataFrame({'degree': degree, 'eigenvector_centrality': ec,
                   'betweenness_centrality': bc})
# --+ correlation matrix
df.corr()
# --+ scatter plot matrix
sns.pairplot(df)


# %% node centrality - case B, small world network

# generator
'''
示例#8
0
def extractnetstats(ID,
                    network,
                    thr,
                    conn_model,
                    est_path,
                    mask,
                    out_file=None):
    from pynets import thresholding, utils

    pruning = True

    ##Load and threshold matrix
    in_mat = np.array(np.genfromtxt(est_path))
    in_mat = thresholding.autofix(in_mat)

    ##Normalize connectivity matrix (weights between 0-1)
    in_mat = thresholding.normalize(in_mat)

    ##Get hyperbolic tangent of matrix if non-sparse (i.e. fischer r-to-z transform)
    if conn_model == 'corr':
        in_mat = np.arctanh(in_mat)
        in_mat[np.isnan(in_mat)] = 0
        in_mat[np.isinf(in_mat)] = 1

    ##Get dir_path
    dir_path = os.path.dirname(os.path.realpath(est_path))

    ##Load numpy matrix as networkx graph
    G_pre = nx.from_numpy_matrix(in_mat)

    ##Prune irrelevant nodes (i.e. nodes who are fully disconnected from the graph and/or those whose betweenness centrality are > 3 standard deviations below the mean)
    if pruning == True:
        [G_pruned, _, _] = most_important(G_pre)
    else:
        G_pruned = G_pre

    ##Make directed if sparse
    if conn_model != 'corr' and conn_model != 'cov' and conn_model != 'tangent':
        G_di = nx.DiGraph(G_pruned)
        G_dir = G_di.to_directed()
        G = G_pruned
    else:
        G = G_pruned

    ##Get corresponding matrix
    in_mat = nx.to_numpy_array(G)

    ##Print graph summary
    print('\n\nThreshold: ' + str(thr))
    print('Source File: ' + str(est_path))
    info_list = list(nx.info(G).split('\n'))[2:]
    for i in info_list:
        print(i)

    try:
        G_dir
        print('Analyzing DIRECTED graph when applicable...')
    except:
        print('Graph is UNDIRECTED')

    if conn_model == 'corr' or conn_model == 'cov' or conn_model == 'tangent':
        if nx.is_connected(G) == True:
            num_conn_comp = nx.number_connected_components(G)
            print('Graph is CONNECTED with ' + str(num_conn_comp) +
                  ' connected component(s)')
        else:
            print('Graph is DISCONNECTED')
    print('\n')

    ##Create Length matrix
    mat_len = thresholding.weight_conversion(in_mat, 'lengths')
    ##Load numpy matrix as networkx graph
    G_len = nx.from_numpy_matrix(mat_len)

    ##Save G as gephi file
    if mask:
        if network:
            nx.write_graphml(
                G, dir_path + '/' + ID + '_' + network + '_' +
                str(os.path.basename(mask).split('.')[0]) + '.graphml')
        else:
            nx.write_graphml(
                G, dir_path + '/' + ID + '_' +
                str(os.path.basename(mask).split('.')[0]) + '.graphml')
    else:
        if network:
            nx.write_graphml(G,
                             dir_path + '/' + ID + '_' + network + '.graphml')
        else:
            nx.write_graphml(G, dir_path + '/' + ID + '.graphml')

    ###############################################################
    ########### Calculate graph metrics from graph G ##############
    ###############################################################
    from networkx.algorithms import degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, transitivity, betweenness_centrality, eigenvector_centrality, communicability_betweenness_centrality, clustering, degree_centrality
    from pynets.netstats import average_local_efficiency, global_efficiency, local_efficiency, modularity_louvain_dir, smallworldness
    ##For non-nodal scalar metrics from custom functions, add the name of the function to metric_list and add the function  (with a G-only input) to the netstats module.
    metric_list = [
        global_efficiency, average_local_efficiency, smallworldness,
        degree_assortativity_coefficient, average_clustering,
        average_shortest_path_length, degree_pearson_correlation_coefficient,
        graph_number_of_cliques, transitivity
    ]

    ##Custom Weight Parameter
    #custom_weight = 0.25
    custom_weight = None

    ##Iteratively run functions from above metric list that generate single scalar output
    num_mets = len(metric_list)
    net_met_arr = np.zeros([num_mets, 2], dtype='object')
    j = 0
    for i in metric_list:
        met_name = str(i).split('<function ')[1].split(' at')[0]
        net_met = met_name
        try:
            if i is 'average_shortest_path_length':
                try:
                    try:
                        net_met_val = float(i(G_dir))
                        print('Calculating from directed graph...')
                    except:
                        net_met_val = float(i(G))
                except:
                    ##case where G is not fully connected
                    net_met_val = float(
                        average_shortest_path_length_for_all(G))
            if custom_weight is not None and i is 'degree_assortativity_coefficient' or i is 'global_efficiency' or i is 'average_local_efficiency' or i is 'average_clustering':
                custom_weight_param = 'weight = ' + str(custom_weight)
                try:
                    net_met_val = float(i(G_dir, custom_weight_param))
                    print('Calculating from directed graph...')
                except:
                    net_met_val = float(i(G, custom_weight_param))
            else:
                try:
                    net_met_val = float(i(G_dir))
                    print('Calculating from directed graph...')
                except:
                    net_met_val = float(i(G))
        except:
            net_met_val = np.nan
        net_met_arr[j, 0] = net_met
        net_met_arr[j, 1] = net_met_val
        print(net_met)
        print(str(net_met_val))
        print('\n')
        j = j + 1
    net_met_val_list = list(net_met_arr[:, 1])

    ##Run miscellaneous functions that generate multiple outputs
    ##Calculate modularity using the Louvain algorithm
    [community_aff, modularity] = modularity_louvain_dir(in_mat)

    ##Calculate core-periphery subdivision
    [Coreness_vec, Coreness_q] = core_periphery_dir(in_mat)

    ##Local Efficiency
    try:
        try:
            le_vector = local_efficiency(G_dir)
        except:
            le_vector = local_efficiency(G)
        print('\nExtracting Local Efficiency vector for all network nodes...')
        le_vals = list(le_vector.values())
        le_nodes = list(le_vector.keys())
        num_nodes = len(le_nodes)
        le_arr = np.zeros([num_nodes + 1, 2], dtype='object')
        j = 0
        for i in range(num_nodes):
            le_arr[j, 0] = str(le_nodes[j]) + '_local_efficiency'
            #print('\n' + str(le_nodes[j]) + '_local_efficiency')
            try:
                le_arr[j, 1] = le_vals[j]
            except:
                le_arr[j, 1] = np.nan
            #print(str(le_vals[j]))
            j = j + 1
        le_arr[num_nodes, 0] = 'MEAN_local_efficiency'
        nonzero_arr_le = np.delete(le_arr[:, 1], [0])
        le_arr[num_nodes, 1] = np.mean(nonzero_arr_le)
        print('Mean Local Efficiency across nodes: ' +
              str(le_arr[num_nodes, 1]))
        print('\n')
    except:
        pass

    ##Local Clustering
    try:
        cl_vector = clustering(G)
        print('\nExtracting Local Clustering vector for all network nodes...')
        cl_vals = list(cl_vector.values())
        cl_nodes = list(cl_vector.keys())
        num_nodes = len(cl_nodes)
        cl_arr = np.zeros([num_nodes + 1, 2], dtype='object')
        j = 0
        for i in range(num_nodes):
            cl_arr[j, 0] = str(cl_nodes[j]) + '_local_clustering'
            #print('\n' + str(cl_nodes[j]) + '_local_clustering')
            try:
                cl_arr[j, 1] = cl_vals[j]
            except:
                cl_arr[j, 1] = np.nan
            #print(str(cl_vals[j]))
            j = j + 1
        cl_arr[num_nodes, 0] = 'MEAN_local_efficiency'
        nonzero_arr_cl = np.delete(cl_arr[:, 1], [0])
        cl_arr[num_nodes, 1] = np.mean(nonzero_arr_cl)
        print('Mean Local Clustering across nodes: ' +
              str(cl_arr[num_nodes, 1]))
        print('\n')
    except:
        pass

    ##Degree centrality
    try:
        try:
            dc_vector = degree_centrality(G_dir)
        except:
            dc_vector = degree_centrality(G)
        print('\nExtracting Degree Centrality vector for all network nodes...')
        dc_vals = list(dc_vector.values())
        dc_nodes = list(dc_vector.keys())
        num_nodes = len(dc_nodes)
        dc_arr = np.zeros([num_nodes + 1, 2], dtype='object')
        j = 0
        for i in range(num_nodes):
            dc_arr[j, 0] = str(dc_nodes[j]) + '_degree_centrality'
            #print('\n' + str(dc_nodes[j]) + '_degree_centrality')
            try:
                dc_arr[j, 1] = dc_vals[j]
            except:
                dc_arr[j, 1] = np.nan
            #print(str(cl_vals[j]))
            j = j + 1
        dc_arr[num_nodes, 0] = 'MEAN_degree_centrality'
        nonzero_arr_dc = np.delete(dc_arr[:, 1], [0])
        dc_arr[num_nodes, 1] = np.mean(nonzero_arr_dc)
        print('Mean Degree Centrality across nodes: ' +
              str(dc_arr[num_nodes, 1]))
        print('\n')
    except:
        pass

    ##Betweenness Centrality
    try:
        bc_vector = betweenness_centrality(G_len, normalized=True)
        print(
            '\nExtracting Betweeness Centrality vector for all network nodes...'
        )
        bc_vals = list(bc_vector.values())
        bc_nodes = list(bc_vector.keys())
        num_nodes = len(bc_nodes)
        bc_arr = np.zeros([num_nodes + 1, 2], dtype='object')
        j = 0
        for i in range(num_nodes):
            bc_arr[j, 0] = str(bc_nodes[j]) + '_betweenness_centrality'
            #print('\n' + str(bc_nodes[j]) + '_betw_cent')
            try:
                bc_arr[j, 1] = bc_vals[j]
            except:
                bc_arr[j, 1] = np.nan
            #print(str(bc_vals[j]))
            j = j + 1
        bc_arr[num_nodes, 0] = 'MEAN_betw_cent'
        nonzero_arr_betw_cent = np.delete(bc_arr[:, 1], [0])
        bc_arr[num_nodes, 1] = np.mean(nonzero_arr_betw_cent)
        print('Mean Betweenness Centrality across nodes: ' +
              str(bc_arr[num_nodes, 1]))
        print('\n')
    except:
        pass

    ##Eigenvector Centrality
    try:
        try:
            ec_vector = eigenvector_centrality(G_dir, max_iter=1000)
        except:
            ec_vector = eigenvector_centrality(G, max_iter=1000)
        print(
            '\nExtracting Eigenvector Centrality vector for all network nodes...'
        )
        ec_vals = list(ec_vector.values())
        ec_nodes = list(ec_vector.keys())
        num_nodes = len(ec_nodes)
        ec_arr = np.zeros([num_nodes + 1, 2], dtype='object')
        j = 0
        for i in range(num_nodes):
            ec_arr[j, 0] = str(ec_nodes[j]) + '_eigenvector_centrality'
            #print('\n' + str(ec_nodes[j]) + '_eig_cent')
            try:
                ec_arr[j, 1] = ec_vals[j]
            except:
                ec_arr[j, 1] = np.nan
            #print(str(ec_vals[j]))
            j = j + 1
        ec_arr[num_nodes, 0] = 'MEAN_eig_cent'
        nonzero_arr_eig_cent = np.delete(ec_arr[:, 1], [0])
        ec_arr[num_nodes, 1] = np.mean(nonzero_arr_eig_cent)
        print('Mean Eigenvector Centrality across nodes: ' +
              str(ec_arr[num_nodes, 1]))
        print('\n')
    except:
        pass

    ##Communicability Centrality
    try:
        cc_vector = communicability_betweenness_centrality(G, normalized=True)
        print(
            '\nExtracting Communicability Centrality vector for all network nodes...'
        )
        cc_vals = list(cc_vector.values())
        cc_nodes = list(cc_vector.keys())
        num_nodes = len(cc_nodes)
        cc_arr = np.zeros([num_nodes + 1, 2], dtype='object')
        j = 0
        for i in range(num_nodes):
            cc_arr[j, 0] = str(cc_nodes[j]) + '_communicability_centrality'
            #print('\n' + str(cc_nodes[j]) + '_comm_cent')
            try:
                cc_arr[j, 1] = cc_vals[j]
            except:
                cc_arr[j, 1] = np.nan
            #print(str(cc_vals[j]))
            j = j + 1
        cc_arr[num_nodes, 0] = 'MEAN_comm_cent'
        nonzero_arr_comm_cent = np.delete(cc_arr[:, 1], [0])
        cc_arr[num_nodes, 1] = np.mean(nonzero_arr_comm_cent)
        print('Mean Communicability Centrality across nodes: ' +
              str(cc_arr[num_nodes, 1]))
        print('\n')
    except:
        pass

    ##Rich club coefficient
    try:
        rc_vector = rich_club_coefficient(G, normalized=True)
        print(
            '\nExtracting Rich Club Coefficient vector for all network nodes...'
        )
        rc_vals = list(rc_vector.values())
        rc_edges = list(rc_vector.keys())
        num_edges = len(rc_edges)
        rc_arr = np.zeros([num_edges + 1, 2], dtype='object')
        j = 0
        for i in range(num_edges):
            rc_arr[j, 0] = str(rc_edges[j]) + '_rich_club'
            #print('\n' + str(rc_edges[j]) + '_rich_club')
            try:
                rc_arr[j, 1] = rc_vals[j]
            except:
                rc_arr[j, 1] = np.nan
            #print(str(rc_vals[j]))
            j = j + 1
        ##Add mean
        rc_arr[num_edges, 0] = 'MEAN_rich_club'
        nonzero_arr_rich_club = np.delete(rc_arr[:, 1], [0])
        rc_arr[num_edges, 1] = np.mean(nonzero_arr_rich_club)
        print('Mean Rich Club Coefficient across edges: ' +
              str(rc_arr[num_edges, 1]))
        print('\n')
    except:
        pass

    ##Create a list of metric names for scalar metrics
    metric_list_names = []
    net_met_val_list_final = net_met_val_list
    for i in net_met_arr[:, 0]:
        metric_list_names.append(i)

    ##Add modularity measure
    try:
        metric_list_names.append('Modularity')
        net_met_val_list_final.append(modularity)
    except:
        pass

    ##Add Core/Periphery measure
    try:
        metric_list_names.append('Coreness')
        net_met_val_list_final.append(Coreness_q)
    except:
        pass

    ##Add local efficiency measures
    try:
        for i in le_arr[:, 0]:
            metric_list_names.append(i)
        net_met_val_list_final = net_met_val_list_final + list(le_arr[:, 1])
    except:
        pass

    ##Add local clustering measures
    try:
        for i in cl_arr[:, 0]:
            metric_list_names.append(i)
        net_met_val_list_final = net_met_val_list_final + list(cl_arr[:, 1])
    except:
        pass

    ##Add centrality measures
    try:
        for i in dc_arr[:, 0]:
            metric_list_names.append(i)
        net_met_val_list_final = net_met_val_list_final + list(dc_arr[:, 1])
    except:
        pass
    try:
        for i in bc_arr[:, 0]:
            metric_list_names.append(i)
        net_met_val_list_final = net_met_val_list_final + list(bc_arr[:, 1])
    except:
        pass
    try:
        for i in ec_arr[:, 0]:
            metric_list_names.append(i)
        net_met_val_list_final = net_met_val_list_final + list(ec_arr[:, 1])
    except:
        pass
    try:
        for i in cc_arr[:, 0]:
            metric_list_names.append(i)
        net_met_val_list_final = net_met_val_list_final + list(cc_arr[:, 1])
    except:
        pass

    ##Add rich club measure
    try:
        for i in rc_arr[:, 0]:
            metric_list_names.append(i)
        net_met_val_list_final = net_met_val_list_final + list(rc_arr[:, 1])
    except:
        pass

    ##Save metric names as pickle
    try:
        import cPickle
    except ImportError:
        import _pickle as cPickle

    if mask != None:
        if network != None:
            met_list_picke_path = os.path.dirname(os.path.abspath(
                est_path)) + '/net_metric_list_' + network + '_' + str(
                    os.path.basename(mask).split('.')[0])
        else:
            met_list_picke_path = os.path.dirname(
                os.path.abspath(est_path)) + '/net_metric_list_' + str(
                    os.path.basename(mask).split('.')[0])
    else:
        if network != None:
            met_list_picke_path = os.path.dirname(
                os.path.abspath(est_path)) + '/net_metric_list_' + network
        else:
            met_list_picke_path = os.path.dirname(
                os.path.abspath(est_path)) + '/net_metric_list'
    cPickle.dump(metric_list_names, open(met_list_picke_path, 'wb'))

    ##And save results to csv
    out_path = utils.create_csv_path(ID, network, conn_model, thr, mask,
                                     dir_path)
    np.savetxt(out_path, net_met_val_list_final)

    return (out_path)
示例#9
0
def extractnetstats(est_path, ID, NETWORK, thr, sps_model, out_file=None):
    in_mat = np.array(genfromtxt(est_path))

    def threshold_proportional(in_mat, thr):
        ##number of nodes
        n = len(in_mat)
        ##clear diagonal
        np.fill_diagonal(in_mat, 0)
        ##if symmetric matrix
        if np.allclose(in_mat, in_mat.T):
            ##ensure symmetry is preserved
            in_mat[np.tril_indices(n)] = 0
            ##halve number of removed links
            ud = 2
        else:
            ud = 1
        ##find all links
        ind = np.where(in_mat)
        ##sort indices by magnitude
        I = np.argsort(in_mat[ind])[::-1]
        ##number of links to be preserved
        en = int(round((n * n - n) * float(thr) / ud))
        ##apply threshold
        in_mat[(ind[0][I][en:], ind[1][I][en:])] = 0
        ##if symmetric matrix
        if ud == 2:
            ##reconstruct symmetry
            in_mat[:, :] = in_mat + in_mat.T
        return in_mat

    threshold_proportional(in_mat, thr)
    ##Get hyperbolic tangent of graph if non-sparse (i.e. fischer r-to-z transform), and divide by the variance of the matrix
    if sps_model == False:
        in_mat = np.arctanh(in_mat) / np.var(in_mat)
    dir_path = os.path.dirname(os.path.realpath(est_path))
    G = nx.from_numpy_matrix(in_mat)

    ###############################################################
    ############Calculate graph metrics from graph G###############
    ###############################################################
    from networkx.algorithms import degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, rich_club_coefficient, transitivity, betweenness_centrality
    #from networkx.algorithms.matching import min_maximal_matching

    from itertools import permutations
    import cPickle

    def efficiency(G, u, v):
        return float(1) / nx.shortest_path_length(G, u, v)

    def global_efficiency(G):
        n = len(G)
        denom = n * (n - 1)
        return float(sum(efficiency(G, u, v)
                         for u, v in permutations(G, 2))) / denom

    def local_efficiency(G):
        return float(sum(global_efficiency(nx.ego_graph(G, v))
                         for v in G)) / len(G)

    metric_list = [
        global_efficiency, local_efficiency, degree_assortativity_coefficient,
        average_clustering, average_shortest_path_length,
        degree_pearson_correlation_coefficient, graph_number_of_cliques,
        rich_club_coefficient, transitivity
    ]

    num_mets = len(metric_list)
    net_met_arr = np.zeros([num_mets, 2], dtype='object')
    j = 0
    for i in metric_list:
        net_met = '%s' % i.func_name
        try:
            net_met_val = float(i(G))
        except:
            net_met_val = np.nan
        net_met_arr[j, 0] = net_met
        net_met_arr[j, 1] = net_met_val
        print(net_met)
        print(str(net_met_val))
        j = j + 1

    if NETWORK != None:
        bc_vector = betweenness_centrality(G)
        bc_vals = bc_vector.values()
        bc_nodes = bc_vector.keys()
        num_nodes = len(bc_nodes)
        bc_arr = np.zeros([num_nodes, 2], dtype='object')
        j = 0
        for i in range(num_nodes):
            bc_arr[j, 0] = NETWORK + '_' + str(bc_nodes[j]) + '_bet_cent'
            bc_arr[j, 1] = bc_vals[j]
            print(NETWORK + '_' + str(bc_nodes[j]))
            print(str(bc_vals[j]))
            j = j + 1
        net_met_val_list = list(net_met_arr[:, 1]) + list(bc_arr[:, 1])
    else:
        net_met_val_list = list(net_met_arr[:, 1])

    metric_list_names = []
    for i in metric_list:
        metric_list_names.append('%s' % i.func_name)

    if NETWORK != None:
        for i in bc_arr[:, 0]:
            metric_list_names.append(i)

    ##Save metric names as pickle
    met_list_picke_path = os.path.dirname(
        os.path.abspath(est_path)) + '/met_list_pickle'
    cPickle.dump(metric_list_names, open(met_list_picke_path, 'wb'))

    ###############################################################
    ###############################################################
    ###############################################################

    ##Save results to csv
    if 'inv' in est_path:
        if NETWORK != None:
            out_path = dir_path + '/' + ID + '_' + NETWORK + '_net_mets_inv_sps_cov.csv'
        else:
            out_path = dir_path + '/' + ID + '_net_mets_inv_sps_cov.csv'
    else:
        if NETWORK != None:
            out_path = dir_path + '/' + ID + '_' + NETWORK + '_net_mets_corr.csv'
        else:
            out_path = dir_path + '/' + ID + '_net_mets_corr.csv'
    np.savetxt(out_path, net_met_val_list)

    return (out_path)
    return (metric_list_names)
示例#10
0
def extractnetstats(ID, NETWORK, thr, conn_model, est_path1, out_file=None):
    import pynets
    from pynets import netstats, thresholding

    ##Load and threshold matrix
    in_mat = np.array(genfromtxt(est_path1))
    in_mat = thresholding.autofix(in_mat)

    ##Get hyperbolic tangent of matrix if non-sparse (i.e. fischer r-to-z transform)
    if conn_model == 'corr':
        in_mat = np.arctanh(in_mat)

    ##Get dir_path
    dir_path = os.path.dirname(os.path.realpath(est_path1))

    ##Assign Weight matrix
    mat_wei = in_mat
    ##Load numpy matrix as networkx graph
    G=nx.from_numpy_matrix(mat_wei)

    ##Create Binary matrix
    #mat_bin = weight_conversion(in_mat, 'binarize')
    ##Load numpy matrix as networkx graph
    #G_bin=nx.from_numpy_matrix(mat_bin)

    ##Create Length matrix
    mat_len = thresholding.weight_conversion(in_mat, 'lengths')
    ##Load numpy matrix as networkx graph
    G_len=nx.from_numpy_matrix(mat_len)

    ##Save gephi files
    if NETWORK != None:
        nx.write_graphml(G, dir_path + '/' + ID + '_' + NETWORK + '.graphml')
    else:
        nx.write_graphml(G, dir_path + '/' + ID + '.graphml')

    ###############################################################
    ########### Calculate graph metrics from graph G ##############
    ###############################################################
    import random
    import itertools
    from itertools import permutations
    from networkx.algorithms import degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, transitivity, betweenness_centrality, rich_club_coefficient, eigenvector_centrality, communicability_centrality
    from pynets.netstats import efficiency, global_efficiency, local_efficiency, create_random_graph, smallworldness_measure, smallworldness, modularity
    ##For non-nodal scalar metrics from networkx.algorithms library, add the name of the function to metric_list for it to be automatically calculated.
    ##For non-nodal scalar metrics from custom functions, add the name of the function to metric_list and add the function  (with a G-only input) to the netstats module.
    #metric_list = [global_efficiency, local_efficiency, smallworldness, degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, transitivity]
    metric_list = [global_efficiency, local_efficiency, degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, transitivity]

    ##Iteratively run functions from above metric list
    num_mets = len(metric_list)
    net_met_arr = np.zeros([num_mets, 2], dtype='object')
    j=0
    for i in metric_list:
        met_name = str(i).split('<function ')[1].split(' at')[0]
        if NETWORK != None:
            net_met = NETWORK + '_' + met_name
        else:
            net_met = met_name
        try:
            net_met_val = float(i(G))
        except:
            net_met_val = np.nan
        net_met_arr[j,0] = net_met
        net_met_arr[j,1] = net_met_val
        print(net_met)
        print(str(net_met_val))
        print('\n')
        j = j + 1
    net_met_val_list = list(net_met_arr[:,1])

    ##Calculate modularity using the Louvain algorithm
    [community_aff, modularity] = modularity(mat_wei)

    ##betweenness_centrality
    try:
        bc_vector = betweenness_centrality(G_len)
        print('Extracting Betweeness Centrality vector for all network nodes...')
        bc_vals = list(bc_vector.values())
        bc_nodes = list(bc_vector.keys())
        num_nodes = len(bc_nodes)
        bc_arr = np.zeros([num_nodes + 1, 2], dtype='object')
        j=0
        for i in range(num_nodes):
            if NETWORK != None:
                bc_arr[j,0] = NETWORK + '_' + str(bc_nodes[j]) + '_betw_cent'
                print('\n' + NETWORK + '_' + str(bc_nodes[j]) + '_betw_cent')
            else:
                bc_arr[j,0] = 'WholeBrain_' + str(bc_nodes[j]) + '_betw_cent'
                print('\n' + 'WholeBrain_' + str(bc_nodes[j]) + '_betw_cent')
            try:
                bc_arr[j,1] = bc_vals[j]
            except:
                bc_arr[j,1] = np.nan
            print(str(bc_vals[j]))
            j = j + 1
        bc_val_list = list(bc_arr[:,1])
        bc_arr[num_nodes,0] = NETWORK + '_MEAN_betw_cent'
        nonzero_arr_betw_cent = np.delete(bc_arr[:,1], [0])
        bc_arr[num_nodes,1] = np.mean(nonzero_arr_betw_cent)
        print('\n' + 'Mean Betweenness Centrality across all nodes: ' + str(bc_arr[num_nodes,1]) + '\n')
    except:
        print('Betweeness Centrality calculation failed. Skipping...')
        bc_val_list = []
        pass

    ##eigenvector_centrality
    try:
        ec_vector = eigenvector_centrality(G_len)
        print('Extracting Eigenvector Centrality vector for all network nodes...')
        ec_vals = list(ec_vector.values())
        ec_nodes = list(ec_vector.keys())
        num_nodes = len(ec_nodes)
        ec_arr = np.zeros([num_nodes + 1, 2], dtype='object')
        j=0
        for i in range(num_nodes):
            if NETWORK != None:
                ec_arr[j,0] = NETWORK + '_' + str(ec_nodes[j]) + '_eig_cent'
                print('\n' + NETWORK + '_' + str(ec_nodes[j]) + '_eig_cent')
            else:
                ec_arr[j,0] = 'WholeBrain_' + str(ec_nodes[j]) + '_eig_cent'
                print('\n' + 'WholeBrain_' + str(ec_nodes[j]) + '_eig_cent')
            try:
                ec_arr[j,1] = ec_vals[j]
            except:
                ec_arr[j,1] = np.nan
            print(str(ec_vals[j]))
            j = j + 1
        ec_val_list = list(ec_arr[:,1])
        ec_arr[num_nodes,0] = NETWORK + '_MEAN_eig_cent'
        nonzero_arr_eig_cent = np.delete(ec_arr[:,1], [0])
        ec_arr[num_nodes,1] = np.mean(nonzero_arr_eig_cent)
        print('\n' + 'Mean Eigenvector Centrality across all nodes: ' + str(ec_arr[num_nodes,1]) + '\n')
    except:
        print('Eigenvector Centrality calculation failed. Skipping...')
        ec_val_list = []
        pass

    ##communicability_centrality
    try:
        cc_vector = communicability_centrality(G_len)
        print('Extracting Communicability Centrality vector for all network nodes...')
        cc_vals = list(cc_vector.values())
        cc_nodes = list(cc_vector.keys())
        num_nodes = len(cc_nodes)
        cc_arr = np.zeros([num_nodes + 1, 2], dtype='object')
        j=0
        for i in range(num_nodes):
            if NETWORK != None:
                cc_arr[j,0] = NETWORK + '_' + str(cc_nodes[j]) + '_comm_cent'
                print('\n' + NETWORK + '_' + str(cc_nodes[j]) + '_comm_cent')
            else:
                cc_arr[j,0] = 'WholeBrain_' + str(cc_nodes[j]) + '_comm_cent'
                print('\n' + 'WholeBrain_' + str(cc_nodes[j]) + '_comm_cent')
            try:
                cc_arr[j,1] = cc_vals[j]
            except:
                cc_arr[j,1] = np.nan
            print(str(cc_vals[j]))
            j = j + 1
        cc_val_list = list(cc_arr[:,1])
        cc_arr[num_nodes,0] = NETWORK + '_MEAN_comm_cent'
        nonzero_arr_comm_cent = np.delete(cc_arr[:,1], [0])
        cc_arr[num_nodes,1] = np.mean(nonzero_arr_comm_cent)
        print('\n' + 'Mean Communicability Centrality across all nodes: ' + str(cc_arr[num_nodes,1]) + '\n')
    except:
        print('Communicability Centrality calculation failed. Skipping...')
        cc_val_list = []
        pass

    ##rich_club_coefficient
    try:
        rc_vector = rich_club_coefficient(G, normalized=True)
        print('Extracting Rich Club Coefficient vector for all network nodes...')
        rc_vals = list(rc_vector.values())
        rc_edges = list(rc_vector.keys())
        num_edges = len(rc_edges)
        rc_arr = np.zeros([num_edges + 1, 2], dtype='object')
        j=0
        for i in range(num_edges):
            if NETWORK != None:
                rc_arr[j,0] = NETWORK + '_' + str(rc_edges[j]) + '_rich_club'
                print('\n' + NETWORK + '_' + str(rc_edges[j]) + '_rich_club')
            else:
                cc_arr[j,0] = 'WholeBrain_' + str(rc_nodes[j]) + '_rich_club'
                print('\n' + 'WholeBrain_' + str(rc_nodes[j]) + '_rich_club')
            try:
                rc_arr[j,1] = rc_vals[j]
            except:
                rc_arr[j,1] = np.nan
            print(str(rc_vals[j]))
            j = j + 1
        ##Add mean
        rc_val_list = list(rc_arr[:,1])
        rc_arr[num_edges,0] = NETWORK + '_MEAN_rich_club'
        nonzero_arr_rich_club = np.delete(rc_arr[:,1], [0])
        rc_arr[num_edges,1] = np.mean(nonzero_arr_rich_club)
        print('\n' + 'Mean Rich Club Coefficient across all edges: ' + str(rc_arr[num_edges,1]) + '\n')
    except:
        print('Rich Club calculation failed. Skipping...')
        rc_val_list = []
        pass

    ##Create a list of metric names for scalar metrics
    metric_list_names = []
    net_met_val_list_final = net_met_val_list
    for i in net_met_arr[:,0]:
        metric_list_names.append(i)

    ##Add modularity measure
    try:
        if NETWORK != None:
            metric_list_names.append(NETWORK + '_Modularity')
        else:
            metric_list_names.append('WholeBrain_Modularity')
        net_met_val_list_final.append(modularity)
    except:
        pass

    ##Add centrality and rich club measures
    try:
        for i in bc_arr[:,0]:
            metric_list_names.append(i)
        net_met_val_list_final = net_met_val_list_final + list(bc_arr[:,1])
    except:
        pass
    try:
        for i in ec_arr[:,0]:
            metric_list_names.append(i)
        net_met_val_list_final = net_met_val_list_final + list(ec_arr[:,1])
    except:
        pass
    try:
        for i in cc_arr[:,0]:
            metric_list_names.append(i)
        net_met_val_list_final = net_met_val_list_final + list(cc_arr[:,1])
    except:
        pass
    try:
        for i in rc_arr[:,0]:
            metric_list_names.append(i)
        net_met_val_list_final = net_met_val_list_final + list(rc_arr[:,1])
    except:
        pass

    ##Save metric names as pickle
    try:
        import cPickle
    except ImportError:
        import _pickle as cPickle
    if NETWORK != None:
        met_list_picke_path = os.path.dirname(os.path.abspath(est_path1)) + '/met_list_pickle_' + NETWORK
    else:
        met_list_picke_path = os.path.dirname(os.path.abspath(est_path1)) + '/met_list_pickle_WB'
    cPickle.dump(metric_list_names, open(met_list_picke_path, 'wb'))

    ##Save results to csv
    if 'inv' in est_path1:
        if NETWORK != None:
            out_path = dir_path + '/' + ID + '_' + NETWORK + '_net_mets_sps_cov_' + str(thr) + '.csv'
        else:
            out_path = dir_path + '/' + ID + '_net_mets_sps_cov_' + str(thr) + '.csv'
    else:
        if NETWORK != None:
            out_path = dir_path + '/' + ID + '_' + NETWORK + '_net_mets_corr_' + str(thr) + '.csv'
        else:
            out_path = dir_path + '/' + ID + '_net_mets_corr_' + str(thr) + '.csv'
    np.savetxt(out_path, net_met_val_list_final)

    return(out_path)
示例#11
0
def extractnetstats(ID,
                    network,
                    thr,
                    conn_model,
                    est_path,
                    roi,
                    prune,
                    node_size,
                    norm,
                    binary,
                    custom_weight=None):
    """
    Function interface for performing fully-automated graph analysis.

    Parameters
    ----------
    ID : str
        A subject id or other unique identifier.
    network : str
        Resting-state network based on Yeo-7 and Yeo-17 naming (e.g. 'Default') used to filter nodes in the study of
        brain subgraphs.
    thr : float
        The value, between 0 and 1, used to threshold the graph using any variety of methods
        triggered through other options.
    conn_model : str
       Connectivity estimation model (e.g. corr for correlation, cov for covariance, sps for precision covariance,
       partcorr for partial correlation). sps type is used by default.
    est_path : str
        File path to the thresholded graph, conn_matrix_thr, saved as a numpy array in .npy format.
    roi : str
        File path to binarized/boolean region-of-interest Nifti1Image file.
    prune : bool
        Indicates whether to prune final graph of disconnected nodes/isolates.
    node_size : int
        Spherical centroid node size in the case that coordinate-based centroids
        are used as ROI's.
    norm : int
        Indicates method of normalizing resulting graph.
    binary : bool
        Indicates whether to binarize resulting graph edges to form an
        unweighted graph.
    custom_weight : float
        The edge attribute that holds the numerical value used as a weight.
        If None, then each edge has weight 1. Default is None.

    Returns
    -------
    out_path : str
        Path to .csv file where graph analysis results are saved.
    """
    import pandas as pd
    import yaml
    try:
        import cPickle as pickle
    except ImportError:
        import _pickle as pickle
    from pathlib import Path
    from pynets import thresholding, utils

    # Advanced options
    fmt = 'edgelist_ssv'
    est_path_fmt = "%s%s" % ('.', est_path.split('.')[-1])

    # Load and threshold matrix
    if est_path_fmt == '.txt':
        in_mat_raw = np.array(np.genfromtxt(est_path))
    else:
        in_mat_raw = np.array(np.load(est_path))

    # De-diagnal
    in_mat = np.array(np.array(thresholding.autofix(in_mat_raw)))

    # Normalize connectivity matrix
    # Force edges to values between 0-1
    if norm == 1:
        in_mat = thresholding.normalize(in_mat)
    # Apply log10
    elif norm == 2:
        in_mat = np.log10(in_mat)
    else:
        pass

    # Correct nan's and inf's
    in_mat[np.isnan(in_mat)] = 0
    in_mat[np.isinf(in_mat)] = 1

    # Get hyperbolic tangent (i.e. fischer r-to-z transform) of matrix if non-covariance
    if (conn_model == 'corr') or (conn_model == 'partcorr'):
        in_mat = np.arctanh(in_mat)

    # Binarize graph
    if binary is True:
        in_mat = thresholding.binarize(in_mat)

    # Get dir_path
    dir_path = os.path.dirname(os.path.realpath(est_path))

    # Load numpy matrix as networkx graph
    G_pre = nx.from_numpy_matrix(in_mat)

    # Prune irrelevant nodes (i.e. nodes who are fully disconnected from the graph and/or those whose betweenness
    # centrality are > 3 standard deviations below the mean)
    if prune == 1:
        [G, _] = prune_disconnected(G_pre)
    elif prune == 2:
        [G, _] = most_important(G_pre)
    else:
        G = G_pre

    # Get corresponding matrix
    in_mat = np.array(nx.to_numpy_matrix(G))

    # Saved pruned
    if (prune != 0) and (prune is not None):
        final_mat_path = "%s%s%s" % (est_path.split(est_path_fmt)[0],
                                     '_pruned_mat', est_path_fmt)
        utils.save_mat(in_mat, final_mat_path, fmt)

    # Print graph summary
    print("%s%.2f%s" % ('\n\nThreshold: ', 100 * float(thr), '%'))
    print("%s%s" % ('Source File: ', est_path))
    info_list = list(nx.info(G).split('\n'))[2:]
    for i in info_list:
        print(i)

    if nx.is_connected(G) is True:
        frag = False
        print('Graph is connected...')
    else:
        frag = True
        print('Warning: Graph is fragmented...\n')

    # Create Length matrix
    mat_len = thresholding.weight_conversion(in_mat, 'lengths')

    # Load numpy matrix as networkx graph
    G_len = nx.from_numpy_matrix(mat_len)

    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    # # # # Calculate global and local metrics from graph G # # # #
    # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    import community
    from networkx.algorithms import degree_assortativity_coefficient, average_clustering, average_shortest_path_length, degree_pearson_correlation_coefficient, graph_number_of_cliques, transitivity, betweenness_centrality, eigenvector_centrality, communicability_betweenness_centrality, clustering, degree_centrality, rich_club_coefficient, sigma
    from pynets.stats.netstats import average_local_efficiency, global_efficiency, participation_coef, participation_coef_sign, diversity_coef_sign
    # For non-nodal scalar metrics from custom functions, add the name of the function to metric_list and add the
    # function (with a G-only input) to the netstats module.
    metric_list_glob = [
        global_efficiency, average_local_efficiency,
        degree_assortativity_coefficient, average_clustering,
        average_shortest_path_length, degree_pearson_correlation_coefficient,
        graph_number_of_cliques, transitivity, sigma
    ]
    metric_list_comm = ['louvain_modularity']
    # with open("%s%s" % (str(Path(__file__).parent), '/global_graph_measures.yaml'), 'r') as stream:
    #     try:
    #         metric_dict_global = yaml.load(stream)
    #         metric_list_global = metric_dict_global['metric_list_global']
    #         print("%s%s%s" % ('\n\nCalculating global measures:\n', metric_list_global, '\n\n'))
    #     except FileNotFoundError:
    #         print('Failed to parse global_graph_measures.yaml')

    with open(
            "%s%s" %
        (str(Path(__file__).parent), '/nodal_graph_measures.yaml'),
            'r') as stream:
        try:
            metric_dict_nodal = yaml.load(stream)
            metric_list_nodal = metric_dict_nodal['metric_list_nodal']
            print("%s%s%s" % ('\n\nCalculating nodal measures:\n',
                              metric_list_nodal, '\n\n'))
        except FileNotFoundError:
            print('Failed to parse nodal_graph_measures.yaml')

    # Note the use of bare excepts in preceding blocks. Typically, this is considered bad practice in python. Here,
    # we are exploiting it intentionally to facilitate uninterrupted, automated graph analysis even when algorithms are
    # undefined. In those instances, solutions are assigned NaN's.

    # Iteratively run functions from above metric list that generate single scalar output
    num_mets = len(metric_list_glob)
    net_met_arr = np.zeros([num_mets, 2], dtype='object')
    j = 0
    for i in metric_list_glob:
        met_name = str(i).split('<function ')[1].split(' at')[0]
        net_met = met_name
        try:
            try:
                net_met_val = raw_mets(G, i, custom_weight)
            except:
                print("%s%s%s" %
                      ('WARNING: ', net_met, ' failed for graph G.'))
                net_met_val = np.nan
        except:
            print("%s%s%s" %
                  ('WARNING: ', str(i), ' is undefined for graph G'))
            net_met_val = np.nan
        net_met_arr[j, 0] = net_met
        net_met_arr[j, 1] = net_met_val
        print(net_met)
        print(str(net_met_val))
        print('\n')
        j = j + 1
    net_met_val_list = list(net_met_arr[:, 1])

    # Create a list of metric names for scalar metrics
    metric_list_names = []
    net_met_val_list_final = net_met_val_list
    for i in net_met_arr[:, 0]:
        metric_list_names.append(i)

    # Run miscellaneous functions that generate multiple outputs
    # Calculate modularity using the Louvain algorithm
    if 'louvain_modularity' in metric_list_comm:
        try:
            ci = community.best_partition(G)
            modularity = community.community_louvain.modularity(ci, G)
            metric_list_names.append('modularity')
            net_met_val_list_final.append(modularity)
        except:
            print('Louvain modularity calculation is undefined for graph G')
            pass

    # Participation Coefficient by louvain community
    if 'participation_coefficient' in metric_list_nodal:
        try:
            if ci is None:
                raise KeyError(
                    'Participation coefficient cannot be calculated for graph G in the absence of a '
                    'community affiliation vector')
            if len(in_mat[in_mat < 0.0]) > 0:
                pc_vector = participation_coef_sign(in_mat, ci)
            else:
                pc_vector = participation_coef(in_mat, ci)
            print(
                '\nExtracting Participation Coefficient vector for all network nodes...'
            )
            pc_vals = list(pc_vector)
            pc_edges = list(range(len(pc_vector)))
            num_edges = len(pc_edges)
            pc_arr = np.zeros([num_edges + 1, 2], dtype='object')
            j = 0
            for i in range(num_edges):
                pc_arr[j, 0] = "%s%s" % (str(pc_edges[j]), '_partic_coef')
                try:
                    pc_arr[j, 1] = pc_vals[j]
                except:
                    print("%s%s%s" %
                          ('Participation coefficient is undefined for node ',
                           str(j), ' of graph G'))
                    pc_arr[j, 1] = np.nan
                j = j + 1
            # Add mean
            pc_arr[num_edges, 0] = 'average_participation_coefficient'
            nonzero_arr_partic_coef = np.delete(pc_arr[:, 1], [0])
            pc_arr[num_edges, 1] = np.mean(nonzero_arr_partic_coef)
            print("%s%s" % ('Mean Participation Coefficient across edges: ',
                            str(pc_arr[num_edges, 1])))
            for i in pc_arr[:, 0]:
                metric_list_names.append(i)
            net_met_val_list_final = net_met_val_list_final + list(pc_arr[:,
                                                                          1])
        except:
            print('Participation coefficient cannot be calculated for graph G')
            pass

    # Diversity Coefficient by louvain community
    if 'diversity_coefficient' in metric_list_nodal:
        try:
            if ci is None:
                raise KeyError(
                    'Diversity coefficient cannot be calculated for graph G in the absence of a community '
                    'affiliation vector')
            [dc_vector, _] = diversity_coef_sign(in_mat, ci)
            print(
                '\nExtracting Diversity Coefficient vector for all network nodes...'
            )
            dc_vals = list(dc_vector)
            dc_edges = list(range(len(dc_vector)))
            num_edges = len(dc_edges)
            dc_arr = np.zeros([num_edges + 1, 2], dtype='object')
            j = 0
            for i in range(num_edges):
                dc_arr[j, 0] = "%s%s" % (str(dc_edges[j]), '_diversity_coef')
                try:
                    dc_arr[j, 1] = dc_vals[j]
                except:
                    print("%s%s%s" %
                          ('Diversity coefficient is undefined for node ',
                           str(j), ' of graph G'))
                    dc_arr[j, 1] = np.nan
                j = j + 1
            # Add mean
            dc_arr[num_edges, 0] = 'average_diversity_coefficient'
            nonzero_arr_diversity_coef = np.delete(dc_arr[:, 1], [0])
            dc_arr[num_edges, 1] = np.mean(nonzero_arr_diversity_coef)
            print("%s%s" % ('Mean Diversity Coefficient across edges: ',
                            str(dc_arr[num_edges, 1])))
            for i in dc_arr[:, 0]:
                metric_list_names.append(i)
            net_met_val_list_final = net_met_val_list_final + list(dc_arr[:,
                                                                          1])
        except:
            print('Diversity coefficient cannot be calculated for graph G')
            pass

    # Local Efficiency
    if 'local_efficiency' in metric_list_nodal:
        try:
            le_vector = local_efficiency(G)
            print(
                '\nExtracting Local Efficiency vector for all network nodes...'
            )
            le_vals = list(le_vector.values())
            le_nodes = list(le_vector.keys())
            num_nodes = len(le_nodes)
            le_arr = np.zeros([num_nodes + 1, 2], dtype='object')
            j = 0
            for i in range(num_nodes):
                le_arr[j, 0] = "%s%s" % (str(le_nodes[j]), '_local_efficiency')
                try:
                    le_arr[j, 1] = le_vals[j]
                except:
                    print(
                        "%s%s%s" % ('Local efficiency is undefined for node ',
                                    str(j), ' of graph G'))
                    le_arr[j, 1] = np.nan
                j = j + 1
            le_arr[num_nodes, 0] = 'average_local_efficiency_nodewise'
            nonzero_arr_le = np.delete(le_arr[:, 1], [0])
            le_arr[num_nodes, 1] = np.mean(nonzero_arr_le)
            print("%s%s" % ('Mean Local Efficiency across nodes: ',
                            str(le_arr[num_nodes, 1])))
            for i in le_arr[:, 0]:
                metric_list_names.append(i)
            net_met_val_list_final = net_met_val_list_final + list(le_arr[:,
                                                                          1])
        except:
            print('Local efficiency cannot be calculated for graph G')
            pass

    # Local Clustering
    if 'local_clustering' in metric_list_nodal:
        try:
            cl_vector = clustering(G)
            print(
                '\nExtracting Local Clustering vector for all network nodes...'
            )
            cl_vals = list(cl_vector.values())
            cl_nodes = list(cl_vector.keys())
            num_nodes = len(cl_nodes)
            cl_arr = np.zeros([num_nodes + 1, 2], dtype='object')
            j = 0
            for i in range(num_nodes):
                cl_arr[j, 0] = "%s%s" % (str(cl_nodes[j]), '_local_clustering')
                try:
                    cl_arr[j, 1] = cl_vals[j]
                except:
                    print(
                        "%s%s%s" % ('Local clustering is undefined for node ',
                                    str(j), ' of graph G'))
                    cl_arr[j, 1] = np.nan
                j = j + 1
            cl_arr[num_nodes, 0] = 'average_local_efficiency_nodewise'
            nonzero_arr_cl = np.delete(cl_arr[:, 1], [0])
            cl_arr[num_nodes, 1] = np.mean(nonzero_arr_cl)
            print("%s%s" % ('Mean Local Clustering across nodes: ',
                            str(cl_arr[num_nodes, 1])))
            for i in cl_arr[:, 0]:
                metric_list_names.append(i)
            net_met_val_list_final = net_met_val_list_final + list(cl_arr[:,
                                                                          1])
        except:
            print('Local clustering cannot be calculated for graph G')
            pass

    # Degree centrality
    if 'degree_centrality' in metric_list_nodal:
        try:
            dc_vector = degree_centrality(G)
            print(
                '\nExtracting Degree Centrality vector for all network nodes...'
            )
            dc_vals = list(dc_vector.values())
            dc_nodes = list(dc_vector.keys())
            num_nodes = len(dc_nodes)
            dc_arr = np.zeros([num_nodes + 1, 2], dtype='object')
            j = 0
            for i in range(num_nodes):
                dc_arr[j,
                       0] = "%s%s" % (str(dc_nodes[j]), '_degree_centrality')
                try:
                    dc_arr[j, 1] = dc_vals[j]
                except:
                    print(
                        "%s%s%s" % ('Degree centrality is undefined for node ',
                                    str(j), ' of graph G'))
                    dc_arr[j, 1] = np.nan
                j = j + 1
            dc_arr[num_nodes, 0] = 'average_degree_cent'
            nonzero_arr_dc = np.delete(dc_arr[:, 1], [0])
            dc_arr[num_nodes, 1] = np.mean(nonzero_arr_dc)
            print("%s%s" % ('Mean Degree Centrality across nodes: ',
                            str(dc_arr[num_nodes, 1])))
            for i in dc_arr[:, 0]:
                metric_list_names.append(i)
            net_met_val_list_final = net_met_val_list_final + list(dc_arr[:,
                                                                          1])
        except:
            print('Degree centrality cannot be calculated for graph G')
            pass

    # Betweenness Centrality
    if 'betweenness_centrality' in metric_list_nodal:
        try:
            bc_vector = betweenness_centrality(G_len, normalized=True)
            print(
                '\nExtracting Betweeness Centrality vector for all network nodes...'
            )
            bc_vals = list(bc_vector.values())
            bc_nodes = list(bc_vector.keys())
            num_nodes = len(bc_nodes)
            bc_arr = np.zeros([num_nodes + 1, 2], dtype='object')
            j = 0
            for i in range(num_nodes):
                bc_arr[j, 0] = "%s%s" % (str(
                    bc_nodes[j]), '_betweenness_centrality')
                try:
                    bc_arr[j, 1] = bc_vals[j]
                except:
                    print("%s%s%s" %
                          ('Betweeness centrality is undefined for node ',
                           str(j), ' of graph G'))
                    bc_arr[j, 1] = np.nan
                j = j + 1
            bc_arr[num_nodes, 0] = 'average_betweenness_centrality'
            nonzero_arr_betw_cent = np.delete(bc_arr[:, 1], [0])
            bc_arr[num_nodes, 1] = np.mean(nonzero_arr_betw_cent)
            print("%s%s" % ('Mean Betweenness Centrality across nodes: ',
                            str(bc_arr[num_nodes, 1])))
            for i in bc_arr[:, 0]:
                metric_list_names.append(i)
            net_met_val_list_final = net_met_val_list_final + list(bc_arr[:,
                                                                          1])
        except:
            print('Betweenness centrality cannot be calculated for graph G')
            pass

    # Eigenvector Centrality
    if 'eigenvector_centrality' in metric_list_nodal:
        try:
            ec_vector = eigenvector_centrality(G, max_iter=1000)
            print(
                '\nExtracting Eigenvector Centrality vector for all network nodes...'
            )
            ec_vals = list(ec_vector.values())
            ec_nodes = list(ec_vector.keys())
            num_nodes = len(ec_nodes)
            ec_arr = np.zeros([num_nodes + 1, 2], dtype='object')
            j = 0
            for i in range(num_nodes):
                ec_arr[j, 0] = "%s%s" % (str(
                    ec_nodes[j]), '_eigenvector_centrality')
                try:
                    ec_arr[j, 1] = ec_vals[j]
                except:
                    print("%s%s%s" %
                          ('Eigenvector centrality is undefined for node ',
                           str(j), ' of graph G'))
                    ec_arr[j, 1] = np.nan
                j = j + 1
            ec_arr[num_nodes, 0] = 'average_eigenvector_centrality'
            nonzero_arr_eig_cent = np.delete(ec_arr[:, 1], [0])
            ec_arr[num_nodes, 1] = np.mean(nonzero_arr_eig_cent)
            print("%s%s" % ('Mean Eigenvector Centrality across nodes: ',
                            str(ec_arr[num_nodes, 1])))
            for i in ec_arr[:, 0]:
                metric_list_names.append(i)
            net_met_val_list_final = net_met_val_list_final + list(ec_arr[:,
                                                                          1])
        except:
            print('Eigenvector centrality cannot be calculated for graph G')
            pass

    # Communicability Centrality
    if 'communicability_centrality' in metric_list_nodal:
        try:
            cc_vector = communicability_betweenness_centrality(G,
                                                               normalized=True)
            print(
                '\nExtracting Communicability Centrality vector for all network nodes...'
            )
            cc_vals = list(cc_vector.values())
            cc_nodes = list(cc_vector.keys())
            num_nodes = len(cc_nodes)
            cc_arr = np.zeros([num_nodes + 1, 2], dtype='object')
            j = 0
            for i in range(num_nodes):
                cc_arr[j, 0] = "%s%s" % (str(
                    cc_nodes[j]), '_communicability_centrality')
                try:
                    cc_arr[j, 1] = cc_vals[j]
                except:
                    print("%s%s%s" %
                          ('Communicability centrality is undefined for node ',
                           str(j), ' of graph G'))
                    cc_arr[j, 1] = np.nan
                j = j + 1
            cc_arr[num_nodes, 0] = 'average_communicability_centrality'
            nonzero_arr_comm_cent = np.delete(cc_arr[:, 1], [0])
            cc_arr[num_nodes, 1] = np.mean(nonzero_arr_comm_cent)
            print("%s%s" % ('Mean Communicability Centrality across nodes: ',
                            str(cc_arr[num_nodes, 1])))
            for i in cc_arr[:, 0]:
                metric_list_names.append(i)
            net_met_val_list_final = net_met_val_list_final + list(cc_arr[:,
                                                                          1])
        except:
            print(
                'Communicability centrality cannot be calculated for graph G')
            pass

    # Rich club coefficient
    if 'rich_club_coefficient' in metric_list_nodal:
        try:
            rc_vector = rich_club_coefficient(G, normalized=True)
            print(
                '\nExtracting Rich Club Coefficient vector for all network nodes...'
            )
            rc_vals = list(rc_vector.values())
            rc_edges = list(rc_vector.keys())
            num_edges = len(rc_edges)
            rc_arr = np.zeros([num_edges + 1, 2], dtype='object')
            j = 0
            for i in range(num_edges):
                rc_arr[j, 0] = "%s%s" % (str(rc_edges[j]), '_rich_club')
                try:
                    rc_arr[j, 1] = rc_vals[j]
                except:
                    print("%s%s%s" %
                          ('Rich club coefficient is undefined for node ',
                           str(j), ' of graph G'))
                    rc_arr[j, 1] = np.nan
                j = j + 1
            # Add mean
            rc_arr[num_edges, 0] = 'average_rich_club_coefficient'
            nonzero_arr_rich_club = np.delete(rc_arr[:, 1], [0])
            rc_arr[num_edges, 1] = np.mean(nonzero_arr_rich_club)
            print("%s%s" % ('Mean Rich Club Coefficient across edges: ',
                            str(rc_arr[num_edges, 1])))
            for i in rc_arr[:, 0]:
                metric_list_names.append(i)
            net_met_val_list_final = net_met_val_list_final + list(rc_arr[:,
                                                                          1])
        except:
            print('Rich club coefficient cannot be calculated for graph G')
            pass

    if roi:
        met_list_picke_path = "%s%s%s%s" % (
            os.path.dirname(os.path.abspath(est_path)), '/net_met_list', "%s" %
            ("%s%s%s" % ('_', network, '_') if network else "_"),
            os.path.basename(roi).split('.')[0])
    else:
        if network:
            met_list_picke_path = "%s%s%s" % (os.path.dirname(
                os.path.abspath(est_path)), '/net_met_list_', network)
        else:
            met_list_picke_path = "%s%s" % (os.path.dirname(
                os.path.abspath(est_path)), '/net_met_list')
    pickle.dump(metric_list_names, open(met_list_picke_path, 'wb'), protocol=2)

    # And save results to csv
    out_path = utils.create_csv_path(ID, network, conn_model, thr, roi,
                                     dir_path, node_size)
    np.savetxt(out_path, net_met_val_list_final, delimiter='\t')

    if frag is True:
        out_path_neat = "%s%s" % (out_path.split('.csv')[0], '_frag_neat.csv')
    else:
        out_path_neat = "%s%s" % (out_path.split('.csv')[0], '_neat.csv')
    df = pd.DataFrame.from_dict(dict(
        zip(metric_list_names, net_met_val_list_final)),
                                orient='index').transpose()
    df.to_csv(out_path_neat, index=False)

    return out_path
示例#12
0
def betweenness_centrality(G,
                           k=None,
                           normalized=True,
                           weight=None,
                           endpoints=False,
                           seed=None):
    r"""Compute the shortest-path betweenness centrality for nodes.

    Betweenness centrality of a node $v$ is the sum of the
    fraction of all-pairs shortest paths that pass through $v$

    .. math::

       c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)}

    where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
    shortest $(s, t)$-paths,  and $\sigma(s, t|v)$ is the number of
    those paths  passing through some  node $v$ other than $s, t$.
    If $s = t$, $\sigma(s, t) = 1$, and if $v \in {s, t}$,
    $\sigma(s, t|v) = 0$ [2]_.

    Parameters
    ----------
    G : graph
      A NetworkX graph.

    normalized : bool, optional
      If True the betweenness values are normalized by `2/((n-1)(n-2))`
      for graphs, and `1/((n-1)(n-2))` for directed graphs where `n`
      is the number of nodes in G.

    weight : None or string, optional (default=None)
      If None, all edge weights are considered equal.
      Otherwise holds the name of the edge attribute used as weight.
      Weights are used to calculate weighted shortest paths, so they are
      interpreted as distances.

    endpoints : bool, optional
      If True include the endpoints in the shortest path counts.

    Returns
    -------
    nodes : dictionary
       Dictionary of nodes with betweenness centrality as the value.

    See Also
    --------
    edge_betweenness_centrality
    load_centrality

    Notes
    -----
    The algorithm is from Ulrik Brandes [1]_.
    See [4]_ for the original first published version and [2]_ for details on
    algorithms for variations and related metrics.

    For approximate betweenness calculations set k=#samples to use
    k nodes ("pivots") to estimate the betweenness values. For an estimate
    of the number of pivots needed see [3]_.

    For weighted graphs the edge weights must be greater than zero.
    Zero edge weights can produce an infinite number of equal length
    paths between pairs of nodes.

    The total number of paths between source and target is counted
    differently for directed and undirected graphs. Directed paths
    are easy to count. Undirected paths are tricky: should a path
    from "u" to "v" count as 1 undirected path or as 2 directed paths?

    For betweenness_centrality we report the number of undirected
    paths when G is undirected.

    For betweenness_centrality_subset the reporting is different.
    If the source and target subsets are the same, then we want
    to count undirected paths. But if the source and target subsets
    differ -- for example, if sources is {0} and targets is {1},
    then we are only counting the paths in one direction. They are
    undirected paths but we are counting them in a directed way.
    To count them as undirected paths, each should count as half a path.

    References
    ----------
    .. [1] Ulrik Brandes:
       A Faster Algorithm for Betweenness Centrality.
       Journal of Mathematical Sociology 25(2):163-177, 2001.
       https://doi.org/10.1080/0022250X.2001.9990249
    .. [2] Ulrik Brandes:
       On Variants of Shortest-Path Betweenness
       Centrality and their Generic Computation.
       Social Networks 30(2):136-145, 2008.
       https://doi.org/10.1016/j.socnet.2007.11.001
    .. [3] Ulrik Brandes and Christian Pich:
       Centrality Estimation in Large Networks.
       International Journal of Bifurcation and Chaos 17(7):2303-2318, 2007.
       https://dx.doi.org/10.1142/S0218127407018403
    .. [4] Linton C. Freeman:
       A set of measures of centrality based on betweenness.
       Sociometry 40: 35–41, 1977
       https://doi.org/10.2307/3033543
    """
    @context_to_dict
    @project_to_simple
    def _betweenness_centrality(G,
                                k=None,
                                normalized=True,
                                weight=None,
                                endpoints=False,
                                seed=None):
        algorithm = "betweenness_centrality"
        if weight is not None:
            algorithm = "betweenness_centrality_generic"
        return AppAssets(algo=algorithm,
                         context="vertex_data")(G,
                                                normalized=normalized,
                                                endpoints=endpoints)

    if not isinstance(G, nx.Graph) or seed is not None:
        return nxa.betweenness_centrality(G, k, normalized, weight, endpoints,
                                          seed)
    return _betweenness_centrality(G,
                                   k=k,
                                   normalized=normalized,
                                   weight=weight,
                                   endpoints=endpoints,
                                   seed=seed)
示例#13
0
 def calculate_betweenness_centrality(self):
     G = self._graph
     self._betweenness_centralities = nxa.betweenness_centrality(G)
示例#14
0
    with open("../data/wallet.json", "r") as file:
        data = load(file)

    for package, deps in data.items():
        dep_graph.add_node(package)
        for dep in deps:
            dep_graph.add_edge(package, dep)

    dep_graph.remove_node("blockchain/unused-My-Wallet")
    return dep_graph


if __name__ == "__main__":
    graph = build_complete_graph()

    betw = betweenness_centrality(graph)
    betw = sorted(betw.items(), key=lambda x: x[1], reverse=True)

    with open("../data/wallet_url_improved.json", "r") as file:
        urls = load(file)

    results = []
    for package, betw_score in tqdm(betw[100:200]):
        cs = 0 if package not in urls else fetch_criticality(package)
        results.append({"name": package, "betw": betw_score, "cs": cs})

    with open("../data/wallet_criticality_100-200.json", "w") as file:
        dump(results, file, indent=4)

    # nx.write_gml(dep_graph, "../data/dep_graph.gml")