def clustering_coefficient_and_path_length(G): clustering_values = clustering(G) clustering_values = np.mean( [abs(value) for value in clustering_values.values()]) path_length = [] for C in (G.subgraph(c).copy() for c in nx.connected_components(G)): path_length.append(average_shortest_path_length(C)) path_length = np.mean(path_length) return clustering_values, path_length
def calc_graph_measures(data_matrix, thresh=0): from networkx import eccentricity from networkx.algorithms.efficiency import global_efficiency from networkx.algorithms.shortest_paths.generic import average_shortest_path_length from networkx.algorithms.centrality import betweenness_centrality from networkx.algorithms.cluster import average_clustering from networkx.algorithms.community.modularity_max import greedy_modularity_communities from networkx.algorithms.community.quality import performance def _avg_values(results): values = [] if isinstance(results, dict): for k in results: values.append(results[k]) elif isinstance(results, list): for tup in results: values.append(tup[1]) return np.mean(values) below_thresh_indices = np.abs(data_matrix) < thresh data_matrix[below_thresh_indices] = 0 if isinstance(data_matrix, np.ndarray): graph = networkx.convert_matrix.from_numpy_matrix(np.real(data_matrix)) if isinstance(data_matrix, pd.DataFrame): graph = networkx.convert_matrix.from_pandas_adjacency(data_matrix) degree = list(graph.degree) global_eff = global_efficiency(graph) b_central = betweenness_centrality(graph) modularity = performance(graph, greedy_modularity_communities(graph)) try: ecc = eccentricity(graph) except networkx.exception.NetworkXError: ecc = [(0, 0)] try: clust = average_clustering(graph) except networkx.exception.NetworkXError: clust = 0 try: char_path = average_shortest_path_length(graph) except networkx.exception.NetworkXError: char_path = 0 graph_dict = {'degree': _avg_values(degree), 'eccentricity': _avg_values(ecc), 'global_efficiency': global_eff, 'characteristic_path_length': char_path, 'betweenness_centrality': _avg_values(b_central), 'clustering_coefficient': clust, 'modularity': modularity} return graph_dict
def network_structure_calculations(sn): g = sn.g _transitivity = transitivity(g) _average_clustering = average_clustering(g) size_biggest_component = -1 connected_components = 0 ave_short_path_biggest = -1 for sg in nx.connected_component_subgraphs(g, False): connected_components += 1 if len(sg) > size_biggest_component: size_biggest_component = len(sg) ave_short_path_biggest = average_shortest_path_length(sg) return (_transitivity,_average_clustering, connected_components, size_biggest_component, ave_short_path_biggest)
def graph_stats(G): """ Compute all the graph-related statistics in the features. Note that since the graph is always fully connected, all of these are the weighted versions. For this reason, many of these functions use the implementations in bctpy rather than NetworkX. """ # Local measures clustering_dict = clustering(G, weight='weight') adjacency = np.array(adjacency_matrix(G).todense()) betweenness_centrality_dict = betweenness_centrality(G, weight='weight') paths = shortest_path_length(G, weight='weight') eccentricities = [max(dists.values()) for (source, dists) in sorted(paths)] local_measures = np.concatenate( [[v for (k, v) in sorted(clustering_dict.items())], [v for (k, v) in sorted(betweenness_centrality_dict.items())], eccentricities]) graph_diameter = max(eccentricities) graph_radius = min(eccentricities) aspl = average_shortest_path_length(G, weight='weight') global_measures = np.array([graph_diameter, graph_radius, aspl]) return np.concatenate([local_measures, global_measures])
def _get_APL(G): """Average Path Length.""" return average_shortest_path_length(G)
import graph_tool import graph_tool.all as gt from networkx.generators.random_graphs import barabasi_albert_graph from networkx.algorithms.shortest_paths.generic import average_shortest_path_length from networkx.readwrite.gml import write_gml import numpy as np import time if __name__ == '__main__': g = barabasi_albert_graph(int(1e4), 1) start_t = time.process_time() average_shortest_path_length(g) print(time.process_time() - start_t) write_gml(g, './graph.gml') g = gt.load_graph('./graph.gml') start_t = time.process_time() all_sp = gt.shortest_distance(g) vertex_avgs = graph_tool.stats.vertex_average(g, all_sp) avg_path = np.sum(vertex_avgs[0]) / (g.num_vertices() - 1) print(time.process_time() - start_t) start_t = time.process_time() sum([sum(i) for i in gt.shortest_distance(g) ]) / (g.num_vertices()**2 - g.num_vertices()) print(time.process_time() - start_t)
def exp(arg0, arg1, arg2): title = arg0 print title path = os.path.abspath('..\\..')+'\\exp2\\' f = open(path+title+'.txt', 'a+') num_nodes = 5000 degree = 4 g1 = create_graph('ba', num_nodes, degree) g2 = create_graph('ba', num_nodes, degree) # g1sort = sorted(nx.degree_centrality(g1).items(), lambda x, y: cmp(x[1], y[1])) # g2sort = sorted(nx.degree_centrality(g2).items(), lambda x, y: cmp(x[1], y[1])) inter = create_couplings_121(g1, g2) myInter = {} if arg1 == 0: for k, v in inter.items(): myInter.setdefault(k, g1.degree(k) + g2.degree(v[0])) elif arg1 == 1: for k, v in inter.items(): myInter.setdefault(k, g1.degree(k) * g2.degree(v[0])) elif arg1 == 2: for k, v in inter.items(): myInter.setdefault(k, abs(g1.degree(k) - g2.degree(v[0]))) if arg2 == 0: myInter = sorted(myInter.items(), lambda x, y: cmp(x[1], y[1])) if arg2 == 1: myInter = sorted(myInter.items(), lambda x, y: cmp(x[1], y[1]), reverse=True) # print myInter p = int(1 / 100.0 * len(inter)) for i in range(60): for item in myInter[i*p:(i+1)*p]: inter.pop(item[0]) pc = findpc(g1, g2, inter) g3 = disjoint_union(g1, g2) # print g3.nodes() for i1, i22 in inter.items(): for i2 in i22: g3.add_edge(i1, i2+num_nodes) singleNodes = [] for node in g3: if g3.degree(node) == 0: singleNodes.append(node) # print singleNodes g3.remove_nodes_from(singleNodes) # print nx.is_connected(g1) # print nx.is_connected(g2) # print nx.is_connected(g3), "====" try: pathLen = average_shortest_path_length(g3) except Exception: continue result = str(pc)+" "+str(pathLen) print title, result, i f.write(result+'\n') f.close()
def convert_graph_to_node_frequence(G, weight=None) -> dict: from statistics import mean from math import log1p from networkx.classes import function as nx_cls_func from networkx.algorithms.components.connected import connected_components from networkx.algorithms.shortest_paths.generic import average_shortest_path_length dict_of_node_frequence = dict.fromkeys(G.nodes, 1) if not nx_cls_func.is_empty(G): for nbunch_of_component in connected_components( nx_cls_func.to_undirected(G)): number_of_nodes_for_component = len(nbunch_of_component) if number_of_nodes_for_component > 1: component_subgraph_of_G = nx_cls_func.subgraph( G, nbunch_of_component) summation_coefficient_of_edges_to_points = ( average_shortest_path_length( component_subgraph_of_G) - 1 ) * number_of_nodes_for_component / 2 #Calculate the average shortest distance without weights; Minus 1 because you want to subtract the node itself; Divide by 2 because the two endpoints of the edge have to share the weight equally. dict_of_edges = dict.fromkeys( component_subgraph_of_G.edges()) if component_subgraph_of_G.is_multigraph(): for _edge in dict_of_edges.keys(): edge_weights = mean( _attribute.get(weight, 1) for _attribute in component_subgraph_of_G. get_edge_data(*_edge).values()) if isinstance(edge_weights, (int, float)): if edge_weights < 0: raise ValueError( 'The weight mean of parallel edges cannot be negative.' ) else: raise TypeError( f"The weight value must be 'int' or 'float', not '{edge_weights.__class__.__name__}'" ) dict_of_edges[_edge] = log1p(edge_weights) else: for _edge in component_subgraph_of_G.edges(): edge_weights = component_subgraph_of_G.edges[ _edge].get(weight, 1) if isinstance(edge_weights, (int, float)): if edge_weights < 0: raise ValueError( 'The weight value cannot be negative.' ) else: raise TypeError( f"The weight value must be 'int' or 'float', not '{edge_weights.__class__.__name__}'" ) dict_of_edges[_edge] = log1p(edge_weights) log1p_weight_sum = sum(dict_of_edges.values()) if log1p_weight_sum <= 0: raise ValueError('The weights cannot all be zero.') for _edge, _log1p_weight in dict_of_edges.items(): frequence_of_nodes_converted_by_edges = ( _log1p_weight / log1p_weight_sum ) * summation_coefficient_of_edges_to_points dict_of_node_frequence[_edge[ 0]] += frequence_of_nodes_converted_by_edges dict_of_node_frequence[_edge[ 1]] += frequence_of_nodes_converted_by_edges return dict_of_node_frequence
def max_degree(g): return gt_stats.vertex_hist(g, 'total')[1][-2] def page_rank(g): # return gt_stats.vertex_hist(g, gt.pagerank(g)) return gt.pagerank(g).get_array() def variance(g): degree_hist = gt_stats.vertex_hist(g, 'total')[0] / g.num_vertices() second_m = np.sum(degree_hist * (np.arange(len(degree_hist)) ** 2)) return second_m - avg_degree(g) ** 2 if __name__ == '__main__': nx_g = barabasi_albert_graph(int(1e3), 2) nx_apl = average_shortest_path_length(nx_g) nx_ad = 2 * nx_g.number_of_edges() / nx_g.number_of_nodes() nx_gcc = transitivity(nx_g) nx_lcc = average_clustering(nx_g) nx_md = len(degree_histogram(nx_g)) - 1 nx_drogc = max(connected_component_subgraphs(nx_g), key=len).number_of_nodes() / nx_g.number_of_nodes() second_m = np.sum(np.array(degree_histogram(nx_g)) * (np.arange(len(degree_histogram(nx_g))) ** 2)) nx_v = math.sqrt(second_m - nx_ad ** 2) nx_ap = degree_pearson_correlation_coefficient(nx_g) nx_aknn = np.flip(np.array( [it[1] for it in sorted( average_degree_connectivity(nx_g).items(), reverse=True )] )) nx_dh = np.array(degree_histogram(nx_g)) / nx_g.number_of_nodes()
def average_shortest_path_length( self ): return average_shortest_path_length( self._network)