def smallWorldness(graph): return_values = [] #Small-worldness criteria n = len(nx.nodes(graph)) e = len(nx.edges(graph)) #probability of edges: (number of edges in real graph)/possible edges p = e/float((n*(n-1)/2.0)) ## #generate random graph using probability rand_graph = nx.fast_gnp_random_graph(n, p, seed=1) #calculate values for real graph and random graph Creal = nx.transitivity(graph) #float Crand = nx.transitivity(rand_graph) #float Lreal = 0 Lrand = 0 real_sum = 0 rand_sum = 0 splReal = shortest_path_lengths(graph) splRand = shortest_path_lengths(rand_graph) for i in range(len(splReal)): real_sum += splReal[i] rand_sum += splRand[i] Lreal = real_sum / len(splReal) Lrand = rand_sum / len(splRand) #compare with actual graph if(Lreal != 0 and Lrand !=0 and Crand !=0): S = (Creal)/(Crand) / (float(Lreal)/(Lrand)) else: S = 0 return_values.append(S) return return_values
def draw_graph(label_flag=True, remove_isolated=True, different_size=True, iso_level=10, node_size=40): G=build_graph(fb.get_friends_network()) betweenness=nx.betweenness_centrality(G) degree=nx.degree_centrality(G) degree_num=[ degree[v] for v in G] maxdegree=max(degree_num);mindegree=min(degree_num); print maxdegree,mindegree clustering=nx.clustering(G) print nx.transitivity(G) # Judge whether remove the isolated point from graph if remove_isolated is True: H = nx.empty_graph() for SG in nx.connected_component_subgraphs(G): if SG.number_of_nodes() > iso_level: H = nx.union(SG, H) G = H # Ajust graph for better presentation if different_size is True: L = nx.degree(G) G.dot_size = {} for k, v in L.items(): G.dot_size[k] = v #node_size = [betweenness[v] *1000 for v in G] node_size = [G.dot_size[v] * 10 for v in G] node_color= [((degree[v]-mindegree))/(maxdegree-mindegree) for v in G] #edge_width = [getcommonfriends(u,v) for u,v in G.edges()] pos = nx.spring_layout(G, iterations=15) nx.draw_networkx_edges(G, pos, alpha=0.05) nx.draw_networkx_nodes(G, pos, node_size=node_size, node_color=node_color, vmin=0.0,vmax=1.0, alpha=0.3) # Judge whether shows label if label_flag is True: nx.draw_networkx_labels(G, pos, font_size=6,alpha=0.1) #nx.draw_graphviz(G) plt.show() return G
def gen_graph_stats (graph): G = nx.read_graphml(graph) stats = {} edges, nodes = 0,0 for e in G.edges_iter(): edges += 1 for n in G.nodes_iter(): nodes += 1 stats['Edges'] = (edges,'The number of edges within the Graph') stats['Nodes'] = (nodes, 'The number of nodes within the Graph') print "%i edges, %i nodes" % (edges, nodes) # Accessing the highest degree node center, degree = sorted(G.degree().items(), key=itemgetter(1), reverse=True)[0] stats['Center Node'] = ('%s: %0.5f' % (center,degree),'The center most node in the graph. Which has the highest degree') hairball = nx.subgraph(G, [x for x in nx.connected_components(G)][0]) print "Average shortest path: %0.4f" % nx.average_shortest_path_length(hairball) stats['Average Shortest Path Length'] = (nx.average_shortest_path_length(hairball), '') # print "Center: %s" % G[center] # print "Shortest Path to Center: %s" % p print "Degree: %0.5f" % degree stats['Degree'] = (degree,'The node degree is the number of edges adjacent to that node.') print "Order: %i" % G.number_of_nodes() stats['Order'] = (G.number_of_nodes(),'The number of nodes in the graph.') print "Size: %i" % G.number_of_edges() stats['Size'] = (G.number_of_edges(),'The number of edges in the graph.') print "Clustering: %0.5f" % nx.average_clustering(G) stats['Average Clustering'] = (nx.average_clustering(G),'The average clustering coefficient for the graph.') print "Transitivity: %0.5f" % nx.transitivity(G) stats['Transitivity'] = (nx.transitivity(G),'The fraction of all possible triangles present in the graph.') part = community.best_partition(G) # values = [part.get(node) for node in G.nodes()] # nx.draw_spring(G, cmap = plt.get_cmap('jet'), node_color = values, node_size=30, with_labels=False) # plt.show() mod = community.modularity(part,G) print "modularity: %0.5f" % mod stats['Modularity'] = (mod,'The modularity of a partition of a graph.') knn = nx.k_nearest_neighbors(G) print knn stats['K Nearest Neighbors'] = (knn,'the average degree connectivity of graph.\nThe average degree connectivity is the average nearest neighbor degree of nodes with degree k. For weighted graphs, an analogous measure can be computed using the weighted average neighbors degre') return G, stats
def get_small_worldness(filename): import networkx as nx threshold = 0 f = open(filename[:-4]+'_small_worldness.dat','w') for i in range(0,101): threshold = float(i)/100 G = get_threshold_matrix(filename, threshold) ER_graph = nx.erdos_renyi_graph(nx.number_of_nodes(G), nx.density(G)) cluster = nx.average_clustering(G) ER_cluster = nx.average_clustering(ER_graph) transi = nx.transitivity(G) ER_transi = nx.transitivity(ER_graph) print 'threshold: %f, average cluster coefficient: %f, random nw: %f, transitivity: %f, random nw: %f' %(threshold, cluster, ER_cluster, transi, ER_transi) f.write("%f\t%f\t%f" % (threshold, cluster, ER_cluster)) components = nx.connected_component_subgraphs(G) ER_components = nx.connected_component_subgraphs(ER_graph) values = [] ER_values = [] for i in range(len(components)): if nx.number_of_nodes(components[i]) > 1: values.append(nx.average_shortest_path_length(components[i])) for i in range(len(ER_components)): if nx.number_of_nodes(ER_components[i]) > 1: ER_values.append(nx.average_shortest_path_length(ER_components[i])) if len(values) == 0: f.write("\t0.") else: f.write("\t%f" % (sum(values)/len(values))) if len(ER_values) == 0: f.write("\t0.") else: f.write("\t%f" % (sum(ER_values)/len(ER_values))) f.write("\t%f\t%f" % (transi, ER_transi)) if (ER_cluster*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 : S_WS = (cluster/ER_cluster) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values))) else: S_WS = 0. if (ER_transi*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 : S_Delta = (transi/ER_transi) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values))) else: S_Delta = 0. f.write("\t%f\t%f" % (S_WS, S_Delta)) f.write("\n") f.close() print "1:threshold 2:cluster-coefficient 3:random-cluster-coefficient 4:shortest-pathlength 5:random-shortest-pathlength 6:transitivity 7:random-transitivity 8:S-Watts-Strogatz 9:S-transitivity"
def get_small_worldness(G, thr): f = open(out_prfx + 'small_worldness.dat', 'a') g = open(out_prfx + 'cc_trans_ER.dat', 'a') #g.write('r(thre.)\t\cc_A\tcc_ER\ttran_A\ttran_ER\n') ER_graph = nx.erdos_renyi_graph(nx.number_of_nodes(G), nx.density(G)) # erdos-renyi, binomial random graph generator ...(N,D:density) cluster = nx.average_clustering(G) # clustering coef. of whole network ER_cluster = nx.average_clustering(ER_graph) #cc of random graph transi = nx.transitivity(G) ER_transi = nx.transitivity(ER_graph) g.write("%f\t%f\t%f\t%f\t%f\n" % (thr, cluster,ER_cluster,transi,ER_transi )) f.write("%f\t%f\t%f" % (thr, cluster, ER_cluster)) components = nx.connected_component_subgraphs(G) ER_components = nx.connected_component_subgraphs(ER_graph) values = [] ER_values = [] for i in range(len(components)): if nx.number_of_nodes(components[i]) > 1: values.append(nx.average_shortest_path_length(components[i])) for i in range(len(ER_components)): if nx.number_of_nodes(ER_components[i]) > 1: ER_values.append(nx.average_shortest_path_length(ER_components[i])) if len(values) == 0: f.write("\t0.") else: f.write("\t%f" % (sum(values)/len(values))) # pathlenght if len(ER_values) == 0: f.write("\t0.") else: f.write("\t%f" % (sum(ER_values)/len(ER_values))) f.write("\t%f\t%f" % (transi, ER_transi)) if (ER_cluster*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 : S_WS = (cluster/ER_cluster) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values))) else: S_WS = 0. if (ER_transi*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 : S_Delta = (transi/ER_transi) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values))) else: S_Delta = 0. f.write("\t%f\t%f" % (S_WS, S_Delta)) # S_WS ~ small worldness f.write("\n") f.close() g.close()
def compute_singlevalued_measures(ntwk, weighted=True, calculate_cliques=False): """ Returns a single value per network """ iflogger.info("Computing single valued measures:") measures = {} iflogger.info("...Computing degree assortativity (pearson number) ...") try: measures["degree_pearsonr"] = nx.degree_pearsonr(ntwk) except AttributeError: # For NetworkX 1.6 measures["degree_pearsonr"] = nx.degree_pearson_correlation_coefficient(ntwk) iflogger.info("...Computing degree assortativity...") try: measures["degree_assortativity"] = nx.degree_assortativity(ntwk) except AttributeError: measures["degree_assortativity"] = nx.degree_assortativity_coefficient(ntwk) iflogger.info("...Computing transitivity...") measures["transitivity"] = nx.transitivity(ntwk) iflogger.info("...Computing number of connected_components...") measures["number_connected_components"] = nx.number_connected_components(ntwk) iflogger.info("...Computing average clustering...") measures["average_clustering"] = nx.average_clustering(ntwk) if nx.is_connected(ntwk): iflogger.info("...Calculating average shortest path length...") measures["average_shortest_path_length"] = nx.average_shortest_path_length(ntwk, weighted) if calculate_cliques: iflogger.info("...Computing graph clique number...") measures["graph_clique_number"] = nx.graph_clique_number(ntwk) # out of memory error return measures
def printStats(filename): ''' Converts json adjacency list into networkx to calculate and print the graphs's - average clustering coefficient - overall clustering coefficient - maximum diameter - average diameter - number of paritions using community.best_parition - modularity of community.best_partition ''' g = makeGraphFromJSON(filename) print "Average Clustering Coefficient: %f" % nx.average_clustering(g) print "Overall Clustering Coefficient: %f" % nx.transitivity(g) connected_subgraphs = list(nx.connected_component_subgraphs(g)) largest = max(nx.connected_component_subgraphs(g), key=len) print "# Connected Components: %d" % len(connected_subgraphs) print " Maximal Diameter: %d" % nx.diameter(largest) print " Average Diameter: %f" % nx.average_shortest_path_length(largest) # Find partition that maximizes modularity using Louvain's algorithm part = community.best_partition(g) print "# Paritions: %d" % (max(part.values()) + 1) print "Louvain Modularity: %f" % community.modularity(part, g)
def get_network_property(graph): """Returns various property of the graph. It calculates the richness coefficient, triangles and transitivity coefficient. To do so, it removes self-loops *in-place*. So, there is a possibility that the graph passed as parameter has been changed. """ remove_self_loop(graph) # If number of nodes is less than three # no point in calculating these property. if len(graph.nodes()) < 3: return ({0: 0.0}, 0, 0) try: richness = nx.rich_club_coefficient(graph) except nx.NetworkXAlgorithmError: # NetworkXAlgorithmError is raised when # it fails achieve desired swaps after # maximum number of attempts. It happened # for a really small graph. But, just to # guard against those cases. richness = nx.rich_club_coefficient(graph, False) triangle = nx.triangles(graph) transitivity = nx.transitivity(graph) return (richness, triangle, transitivity)
def plot_distribution(distribution_type,legend,graph,list_communities,out=None): x = [i for i in range(0,len(list_communities[0]))] for communities in list_communities: if distribution_type.lower() == "nodes": y = list(map(len,communities)) else: y = [] for l in communities: H = graph.subgraph(l) if distribution_type.lower() == "density": y.append(nx.density(H)) elif distribution_type.lower() == "transitivity": y.append(nx.transitivity(H)) else: return None plt.plot(x,y,linewidth=2,alpha=0.8) #plt.yscale("log") plt.legend(legend, loc='upper left') plt.xlabel("Comunity ID") plt.ylabel(distribution_type) if out == None: plt.show() else: plt.savefig(out+".svg",bbox_inches="tight") plt.close()
def connected_components(self): """ Returns basic statistics about the connected components of the graph. This includes their number, order, size, diameter, radius, average clusttering coefficient, transitivity, in addition to basic info about the largest and smallest connected components. """ cc_stats = {} cc = nx.connected_components(self.graph.structure) for index, component in enumerate(cc): cc_stats[index] = {} this_cc = cc_stats[index] this_cc["order"] = len(component) this_cc["size"] = len(self.graph.structure.edges(component)) subgraph = self.graph.structure.subgraph(component) this_cc["avg_cluster"] = nx.average_clustering(subgraph) this_cc["transitivity"] = nx.transitivity(subgraph) eccentricity = nx.eccentricity(subgraph) ecc_values = eccentricity.values() this_cc["diameter"] = max(ecc_values) this_cc["radius"] = min(ecc_values) return cc_stats
def get_motifs(filename): import networkx as nx from math import factorial threshold = 0 f = open(filename[:-4]+'_motifs.dat','w') for i in range(0,101): threshold = float(i)/100 G = get_threshold_matrix(filename, threshold) tri_dict = nx.triangles(G) summe = 0 for node in tri_dict: summe += tri_dict[node] N = nx.number_of_nodes(G) ratio = summe / (3. * binomialCoefficient(N,3)) transi = nx.transitivity(G) if transi > 0: triads = summe / transi ratio_triads = triads / (3 * binomialCoefficient(N,3)) else: triads = 0. ratio_triads = 0. print 'threshold: %f, number of triangles: %f, ratio: %f, triads: %f, ratio: %f' %(threshold, summe/3, ratio, triads, ratio_triads) f.write("%f\t%d\t%f\t%f\t%f\n" % (threshold, summe/3, ratio, triads, ratio_triads)) f.close() print "1:threshold 2:#triangles 3:ratio-to-potential-triangles 4:triads 5:ratio-to-potential-triads"
def netstats_simple(graph): G = graph if nx.is_connected(G): d = nx.diameter(G) r = nx.radius(G) else: d = 'NA - graph is not connected' #should be calculatable on unconnected graph - see example code for hack r = 'NA - graph is not connected' #using dictionary to pack values and variablesdot, eps, ps, pdf break equally result = {#"""single value measures""" 'nn': G.number_of_nodes(), 'ne': G.number_of_edges(), 'd': d, 'r': r, 'conn': nx.number_connected_components(G), 'asp': nx.average_shortest_path_length(G), # """number of the largest clique""" 'cn': nx.graph_clique_number(G), # """number of maximal cliques""" 'mcn': nx.graph_number_of_cliques(G), # """transitivity - """ 'tr': nx.transitivity(G), #cc = nx.clustering(G) """clustering coefficient""" 'avgcc': nx.average_clustering(G) } # result['d'] = nx.diameter(G) print result return result
def degree_statistics(G): n_nodes = G.number_of_nodes() start = time.clock() # list of sampled graphs g_list[:] = [] for i in range(N_SAMPLES): g_list.append(generate_sample(G)) print "Sampling graphs - Elapsed ", (time.clock() - start) ##### # number of edges s_NE s_NE = sum(e[2]['p'] for e in G.edges_iter(data=True)) # average degree s_AD s_AD = 2*s_NE /n_nodes # maximal degree s_MD sum_MD = 0.0 for aG in g_list: max_deg = max(aG.degree().itervalues()) sum_MD += max_deg s_MD = sum_MD/N_SAMPLES # degree variance s_DV sum_DV = 0.0 for aG in g_list: deg_var = 1.0/n_nodes * sum((d - s_AD)*(d-s_AD) for d in aG.degree().itervalues()) sum_DV += deg_var s_DV = sum_DV/N_SAMPLES # clustering coefficient s_CC sum_CC = 0.0 for aG in g_list: cc = nx.transitivity(aG) sum_CC += cc s_CC = sum_CC/N_SAMPLES # degree distribution deg_list = [0 for i in range(MAX_DEG)] for aG in g_list: for d in aG.degree().itervalues(): deg_list[d] += 1 i = MAX_DEG-1 while deg_list[i] == 0: i = i-1 deg_list = deg_list[:i+1] print "len(deg_list) =", len(deg_list) print deg_list for i in range(len(deg_list)): deg_list[i] = float(deg_list[i])/N_SAMPLES # return s_NE, s_AD, s_MD, s_DV, s_CC, deg_list
def cluster(): if created == 0: print 'No graph created!' elif created == 1: try: print 'The clustering coefficient for the whole graph is %0.4f.'%(nx.transitivity(G)) except nx.NetworkXError, e: print e
def preferentialAttachment(G): n = G.number_of_nodes() m = random.randrange(15,20) PG = nx.barabasi_albert_graph(n,m) plot(PG) l = math.log(n)/math.log(math.log(n)) print 'Global Clustering: {0}\t'.format(str(nx.transitivity(PG))), print 'Average path length : {0}\n'.format(str(l))
def get_my_small_worldness(filename) : threshold = 0 f = open(filename[:-4]+'_small_worldness.dat','w') f.write('thresh\t\taver_clus\t\tave_ER_clus\t\tcoup_coeff\t\tchar_path\t\ttransi\t\tER_transi\t\tS_WS\t\tS_delta\n') print f for i in range(0,101): threshold = float(i)/100 G = get_my_threshold_matrix(filename, threshold) ER_graph = nx.erdos_renyi_graph(nx.number_of_nodes(G), nx.density(G)) # random graph cluster = nx.average_clustering(G) ER_cluster = nx.average_clustering(ER_graph) transi = nx.transitivity(G) ER_transi = nx.transitivity(ER_graph) f.write('%f\t%f\t%f'% (threshold,cluster,ER_cluster)) components = nx.connected_component_subgraphs(G) ER_components = nx.connected_component_subgraphs(ER_graph) values = [] ER_values = [] for i in range(len(components)) : if nx.number_of_nodes(components[i]) > 1: values.append(nx.average_shortest_path_length(components[i])) for i in range(len(ER_components)) : if nx.number_of_nodes(ER_components[i]) > 1: ER_values.append(nx.average_shortest_path_length(ER_components[i])) if len(values) == 0 : f.write("\t0.") else : f.write("\t%f" % (sum(values)/len(values))) if len(ER_values) == 0: f.write("\t0.") else: f.write("\t%f" % (sum(ER_values)/len(ER_values))) f.write("\t%f\t%f" % (transi, ER_transi)) if (ER_cluster*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 : S_WS = (cluster/ER_cluster) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values))) else: S_WS = 0. if (ER_transi*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 : S_Delta = (transi/ER_transi) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values))) else: S_Delta = 0. f.write("\t%f\t%f" % (S_WS, S_Delta)) f.write("\n") f.close()
def test_clustering_transitivity(self): # check that weighted average of clustering is transitivity G = nx.complete_graph(5) G.remove_edge(1,2) t1=nx.transitivity(G) (cluster_d2,weights)=nx.clustering(G,weights=True) trans=[] for v in G.nodes(): trans.append(cluster_d2[v]*weights[v]) t2=sum(trans) assert_almost_equal(abs(t1-t2),0)
def myglobalclust(corr): if not isinstance(corr,nx.DiGraph): return [nx.transitivity(corr)] #3*triangles/triads corr = np.array(nx.to_numpy_matrix(corr)) mat = np.dot(corr,corr) paths = np.sum(mat) - np.trace(mat) mat = np.dot(mat,corr) loops = np.trace(mat) if paths == 0: return [0] else: return [float(loops)/paths]
def calGraph(infile, mode = 1): #init Parameter inputpath = 'edge_list/' n = mode Data_G = inputpath+infile+'_'+str(n)+'.edgelist' #init Graph G = nx.read_edgelist(Data_G, create_using=nx.DiGraph()) GU = nx.read_edgelist(Data_G) average_clustering = nx.average_clustering(GU) transitivity = nx.transitivity(G) return [average_clustering, transitivity]
def graphAnalysis(graph, top_number, save_file_path): """ Do the essential analysis to the final combined graph """ with io.open(save_file_path, 'w') as save_file: # centrality # degree centrality deg_central = nx.degree_centrality(graph) deg_central_sort = sorted(deg_central.items(), key = lambda x: x[1], reverse = True) top_deg_central_sort = deg_central_sort[:top_number] save_file.write('top %d degree centrality items,' % top_number) save_file.write(','.join('%s %s' % x for x in top_deg_central_sort)) # clustering # number of triangles: triangles() is not defined for directed graphs triangle_num = nx.triangles(graph) triangle_num_sort = sorted(triangle_num.items(), key = lambda x: x[1], reverse = True) top_triangle_num_sort = triangle_num_sort[:top_number] save_file.write('\ntop %d number of triangles including a node as one vertex,' % top_number) save_file.write(','.join('%s %s' % x for x in top_triangle_num_sort)) # clustering coefficient of node in the graph cluster_coefficient = nx.clustering(graph) cluster_coefficient_sort = sorted(cluster_coefficient.items(), key = lambda x: x[1], reverse = True) top_cluster_coefficient_sort = cluster_coefficient_sort[:top_number] save_file.write('\ntop %d clustering coefficient items,' % top_number) save_file.write(','.join('%s %s' % x for x in top_cluster_coefficient_sort)) # transitivity of the graph triangle_transitivity = nx.transitivity(graph) save_file.write('\ntransitivity of the graph,%f' % triangle_transitivity) # average clustering coefficient of the graph avg_cluster = nx.average_clustering(graph) save_file.write('\naverage clustering coefficient of the graph,%f' % avg_cluster) # clique # size of the largest clique in the graph size_largest_clique = nx.graph_clique_number(graph) save_file.write('\nsize of the largest clique in the graph,%d' % size_largest_clique) # all the cliques in the graph all_clique = nx.find_cliques(graph) # a generator list_all_clique = list(all_clique) list_all_clique_sort = sorted(list_all_clique, key = lambda x: len(x), reverse = True) list_all_clique_sort = [' '.join(clique) for clique in list_all_clique_sort] # print list_all_clique_sort save_file.write('\ncliques,') save_file.write(','.join(x for x in list_all_clique_sort))
def ClusteringCoefficientCentralityExperiment(G, min_target, max_target, filename): print nx.info(G) print 'Global Clustering Coefficient:', nx.transitivity(G.to_undirected()) X_Clustering_Coefficient = [] Y_nD = [] target = min_target while target <= max_target: copyG = G.copy() new_G = SimulatedAnnealing(copyG, target, test_cost_function) clustering_coeff = nx.transitivity(new_G.to_undirected()) nD = SCT.controllability(new_G) X_Clustering_Coefficient.append(clustering_coeff) Y_nD.append(nD) print "target = ", target, " CC = ", clustering_coeff, 'nD = ', nD target += 0.05 s = 'results/' + filename; with open(s, "w") as f: for i in range(len(Y_nD)): print >> f, "%f %f"%(X_Clustering_Coefficient[i], Y_nD[i]) return (X_Clustering_Coefficient, Y_nD)
def compute_singlevalued_measures(ntwk, weighted=True, calculate_cliques=False): """ Returns a single value per network """ iflogger.info('Computing single valued measures:') measures = {} iflogger.info('...Computing degree assortativity (pearson number) ...') try: measures['degree_pearsonr'] = nx.degree_pearsonr(ntwk) except AttributeError: # For NetworkX 1.6 measures[ 'degree_pearsonr'] = nx.degree_pearson_correlation_coefficient( ntwk) iflogger.info('...Computing degree assortativity...') try: measures['degree_assortativity'] = nx.degree_assortativity(ntwk) except AttributeError: measures['degree_assortativity'] = nx.degree_assortativity_coefficient( ntwk) iflogger.info('...Computing transitivity...') measures['transitivity'] = nx.transitivity(ntwk) iflogger.info('...Computing number of connected_components...') measures['number_connected_components'] = nx.number_connected_components( ntwk) iflogger.info('...Computing graph density...') measures['graph_density'] = nx.density(ntwk) iflogger.info('...Recording number of edges...') measures['number_of_edges'] = nx.number_of_edges(ntwk) iflogger.info('...Recording number of nodes...') measures['number_of_nodes'] = nx.number_of_nodes(ntwk) iflogger.info('...Computing average clustering...') measures['average_clustering'] = nx.average_clustering(ntwk) if nx.is_connected(ntwk): iflogger.info('...Calculating average shortest path length...') measures[ 'average_shortest_path_length'] = nx.average_shortest_path_length( ntwk, weighted) else: iflogger.info('...Calculating average shortest path length...') measures[ 'average_shortest_path_length'] = nx.average_shortest_path_length( nx.connected_component_subgraphs(ntwk)[0], weighted) if calculate_cliques: iflogger.info('...Computing graph clique number...') measures['graph_clique_number'] = nx.graph_clique_number( ntwk) # out of memory error return measures
def main(filename): G, UG = loadEdgeList(filename) print 'Original Graph' print '--------------\n' directedgraph_plot(G) print 'Global Clustering: {0}\t'.format(str(nx.transitivity(G))), print 'Average path length : {0}\n'.format(str(nx.average_shortest_path_length(UG))) print '\nRandom Graph Model' print '------------------\n' randomGraph(G) print '\nSmall World Model' print '-----------------\n' smallworld(G) print '\nPreferential Attachement Model' print '------------------------------\n' preferentialAttachment(G)
def get_network_statistics(G): ''' Compute key nework statistics for ''' size = len(G) density = nx.density(G) #diameter = nx.diameter(G) clustering = nx.average_clustering(G.to_undirected()) transitivity = nx.transitivity(G.to_undirected()) grc = global_reaching_centrality(G) return {"size": size, "density": density, #"diameter": diameter, "clustering": clustering, "transitivity": transitivity, "grc": grc}
def test_networkx(file_name): start = time.clock() g = nx.read_edgelist("../data/" + file_name + ".gr", '#', '\t', None, nodetype=int, data=False) print "elapsed ", time.clock() - start print "#nodes =", g.number_of_nodes() print "#edges =", g.number_of_edges() deg_list = nx.degree(g) max_deg = max(deg_list.itervalues()) print "max_deg =", max_deg # start = time.clock() clustering_coeff = nx.transitivity(g) print "clustering_coeff =", clustering_coeff print "elapsed ", time.clock() - start
def subgraphProperty(H): nnodes = nx.number_of_nodes(H) if nnodes < 2: return (2, 0, 0, 0, 0) nedges = nx.number_of_edges(H) dens = edge_density(nedges, nnodes) average_score = 0 score = 0 for (u, v, d) in H.edges(data=True): score += d['score'] # average over all possible edges # average_score = 2 * score / (nnodes * (nnodes - 1)) # average over all present edges average_score = score / nedges trans = nx.transitivity(H) return (nnodes, nedges, dens, average_score, trans)
def get_motifs(G, thr): f = open(out_prfx + 'motifs.dat', 'a') tri_dict = nx.triangles(G) #number of triangles around nodes in G summe = 0 for node in tri_dict: summe += tri_dict[node] # summing up all triangle numbers over nodes N = nx.number_of_nodes(G) ratio = summe / (3. * binomialCoefficient(N,3)) # ratio to porential tria. transi = nx.transitivity(G) if transi > 0: triads = summe / transi # triads ratio_triads = triads / (3 * binomialCoefficient(N,3)) #ratio to pot. else: triads = 0. ratio_triads = 0. f.write("%f\t%d\t%f\t%f\t%f\n" % (thr, summe/3, ratio, triads, ratio_triads)) f.close()
def save_network_statistics(g): stats = {} stats['num_weakly_connected_components'] = nx.number_weakly_connected_components(g) stats['num_strongly_connected_components'] = nx.number_strongly_connected_components(g) stats['num_nodes'] = nx.number_of_nodes(g) stats['num_edges'] = nx.number_of_edges(g) stats['density'] = nx.density(g) try: stats['avg_clustering_coef'] = nx.average_clustering(g) except: stats['avg_clustering_coef'] = None # not defined for directed graphs stats['avg_degree'] = sum(g.degree().values()) / float(stats['num_nodes']) stats['transitivity'] = nx.transitivity(g) try: stats['diameter'] = nx.diameter(g) except: stats['diameter'] = None # unconnected --> infinite path length between connected components with open('./network-statistics/twitter-combined-statistics.txt', 'wb') as f: for stat_name, stat_value in stats.iteritems(): f.write(stat_name + ': ' + str(stat_value) + '\n')
def compute_local_clustering(genelist, T250, commGraphs): ''' First part computes clustering coefficient of a subset of nodes delta in the set Second part computes communities in T250 and calculates the third term ''' vertexList = {k: [] for k in range(0, len(commGraphs.keys()))} for comm in commGraphs: for gene in genelist: if gene in commGraphs[comm].nodes(): vertexList[comm].append(gene) k = 0 delta = 0 for comm in vertexList.keys(): if len(vertexList[comm]) > 0: delta += NX.transitivity(NX.subgraph(commGraphs[comm], vertexList[comm])) k += 1 return delta / float(k)
def loadEdgeList(anomymizedEdges): G = nx.DiGraph() UG = nx.Graph() with open(anomymizedEdges,'rb') as file: content = csv.reader(file) for row in content: G.add_edge(row[0],row[1]) UG = G.to_undirected() print 'Average Local Clustering : {0}\n'.format(str(nx.average_clustering(UG))) print 'Global Clustering: {0}\n'.format(str(nx.transitivity(G))) print 'Page Rank Centrality:' pageRank = sorted(nx.pagerank_numpy(G).items(),key=lambda x:x[1]) for i in pageRank[-10:]: print '{0} {1}'.format(i[0],i[1]) print '' print 'Eigenvector Centrality:' eigenVector = sorted(nx.centrality.eigenvector_centrality(G).items(),key=lambda x:x[1] ) for i in eigenVector[-10:]: print '{0} {1}'.format(i[0],i[1]) print '' print 'Degree Centrality:' degreeCentrality = sorted(nx.centrality.in_degree_centrality(G).items(),key=lambda x:x[1]) for i in degreeCentrality[-10:]: print '{0} {1}'.format(i[0],i[1]) print '' print 'Rank correlation between Pagerank Centrality and Eigenvector Centrality: ' ,st.spearmanr([i[1] for i in pageRank],[i[1] for i in eigenVector])[0] print 'Rank correlation between Pagerank Centrality and Degree Centrality: ',st.spearmanr([i[1] for i in pageRank],[i[1] for i in degreeCentrality])[0] print 'Rank correlation between Degree Centrality and Eigenvector Centrality: ',st.spearmanr([i[1] for i in degreeCentrality],[i[1] for i in eigenVector])[0] js = max([i for i in nx.algorithms.link_prediction.jaccard_coefficient(UG)], key = lambda x:x[2]) print '\nNodes with max Jaccard Similarity : {0} {1}\n'.format(str(js[0]),str(js[1]))
def degree_statistics_one(G): n_nodes = G.number_of_nodes() ##### # number of edges s_NE s_NE = G.number_of_edges() # average degree s_AD s_AD = 2*float(s_NE) /n_nodes # maximal degree s_MD s_MD = max(G.degree().itervalues()) # degree variance s_DV s_DV = 1.0/n_nodes * sum((d - s_AD)*(d-s_AD) for d in G.degree().itervalues()) # clustering coefficient s_CC s_CC = nx.transitivity(G) # degree distribution --> HISTOGRAM count # deg_list = [0 for i in range(MAX_DEG)] # for d in G.degree().itervalues(): # deg_list[d] += 1 # # i = MAX_DEG-1 # while deg_list[i] == 0: # i = i-1 # deg_list = deg_list[:i+1] # print "len(deg_list) =", len(deg_list) # print deg_list # degree distribution --> keep it as (multi) SET deg_list = list(G.degree().itervalues()) # return s_NE, s_AD, s_MD, s_DV, s_CC, deg_list
def analyze_graph(graph): # https://networkx.github.io/documentation/latest/reference/algorithms/generated/networkx.algorithms.cluster.triangles.html # Triangles per nodes, we should analyse the average per graph triangles = np.average(list(nx.triangles(graph).values())) # https://networkx.github.io/documentation/latest/reference/algorithms/generated/networkx.algorithms.cluster.transitivity.html transitivity = nx.transitivity(graph) # https://networkx.github.io/documentation/latest/reference/algorithms/generated/networkx.algorithms.cluster.clustering.html # clustering = nx.clustering(graph, weight='weight').values() # https://networkx.github.io/documentation/latest/reference/algorithms/generated/networkx.algorithms.cluster.average_clustering.html average_clustering = nx.average_clustering(graph, weight='weight', count_zeros=False) # https://networkx.github.io/documentation/latest/reference/algorithms/generated/networkx.algorithms.bipartite.centrality.closeness_centrality.html closeness = nx.closeness_centrality(graph).values() # https://networkx.github.io/documentation/latest/reference/algorithms/generated/networkx.algorithms.bipartite.centrality.betweenness_centrality.html betweenness = nx.betweenness_centrality(graph).values() # https://networkx.github.io/documentation/latest/reference/algorithms/generated/networkx.algorithms.assortativity.degree_assortativity_coefficient.html homophily = nx.degree_assortativity_coefficient(graph, weight='weight') # https://networkx.github.io/documentation/networkx-1.9.1/reference/generated/networkx.algorithms.assortativity.attribute_assortativity_coefficient.html # Homophily by citations homophily_citations = nx.attribute_assortativity_coefficient( graph, 'citations') # Homophily by university homophily_university = nx.attribute_assortativity_coefficient( graph, 'university') return { 'triangles': np.round(triangles, 2), 'transitivity': transitivity, # 'clustering': clustering, 'average_clustering': average_clustering, 'closeness': list(closeness), 'betweenness': list(betweenness), 'homophily': homophily, 'homophily_citations': homophily_citations, 'homophily_university': homophily_university }
def create_graph(red_G): pos = nx.spring_layout(red_G) nx.draw_networkx_nodes(red_G, pos, node_color='black', node_size=30) nx.draw_networkx_edges(red_G, pos, edge_color='purple') nx.draw_networkx_labels(red_G, pos, font_size=10, font_family='Arial') plt.axis('off') plt.show() #самые центральные слова centr_words = [] deg_centr_words = nx.degree_centrality(red_G) for nodeid in sorted(deg_centr_words, key=deg_centr_words.get, reverse=True): centr_words.append(nodeid) print('Самые центральные слова графа:' + str(centr_words[0]) + ',' + str(centr_words[1]) + '.') #радиус графа print('Радиус графа:' + str(nx.radius(red_G))) #коэффициент кластеризации print('Коэффициент кластеризации:' + str(nx.average_clustering(red_G))) print(nx.transitivity(red_G))
opr_nodes = operon_nodes(crs_f, similarity_cutoff, zscore_cutoff) #pprint(opr_nodes) G = motif_graph(crs_f, similarity_cutoff, zscore_cutoff) loo_f = "../../data/LOO_per_matrix_site.tsv" regulon_f = "../../data/regulon_by_first_gene.txt" LOO = read_LOO(loo_f) regulon = read_regulon(regulon_f) print("#graph has %d nodes with %d edges, edges to nodes ratio: %f, edge average zscore: %f, %f transitivity"\ %(nx.number_of_nodes(G), nx.number_of_edges(G), edge_density(nx.number_of_edges(G), nx.number_of_nodes(G)), edge_average_zscore(G), nx.transitivity(G))) print("#", nx.number_connected_components(G), "connected components") print( "reg\tLOO\tsize\tnodes\tedges\tratio\tavg_zscore\tnumber_connected_components\tlargest_comp_size\ttransitivity" ) for reg in regulon.keys(): if len(regulon[reg]) > 2: nodes = list() for gi in regulon[reg]: for n in opr_nodes[gi]: nodes.append(n) H = G.subgraph(nodes) if nx.number_of_nodes(H) < 2:
def test_cubical(self): G = nx.cubical_graph() assert nx.transitivity(G) == 0.0
def test_transitivity(self): G = nx.Graph() assert nx.transitivity(G) == 0.0
def sigma(G, niter=100, nrand=10, seed=None): """Returns the small-world coefficient (sigma) of the given graph. The small-world coefficient is defined as: sigma = C/Cr / L/Lr where C and L are respectively the average clustering coefficient and average shortest path length of G. Cr and Lr are respectively the average clustering coefficient and average shortest path length of an equivalent random graph. A graph is commonly classified as small-world if sigma>1. Parameters ---------- G : NetworkX graph An undirected graph. niter : integer (optional, default=100) Approximate number of rewiring per edge to compute the equivalent random graph. nrand : integer (optional, default=10) Number of random graphs generated to compute the average clustering coefficient (Cr) and average shortest path length (Lr). seed : integer, random_state, or None (default) Indicator of random number generation state. See :ref:`Randomness<randomness>`. Returns ------- sigma : float The small-world coefficient of G. Notes ----- The implementation is adapted from Humphries et al. [1]_ [2]_. References ---------- .. [1] The brainstem reticular formation is a small-world, not scale-free, network M. D. Humphries, K. Gurney and T. J. Prescott, Proc. Roy. Soc. B 2006 273, 503-511, doi:10.1098/rspb.2005.3354. .. [2] Humphries and Gurney (2008). "Network 'Small-World-Ness': A Quantitative Method for Determining Canonical Network Equivalence". PLoS One. 3 (4). PMID 18446219. doi:10.1371/journal.pone.0002051. """ import numpy as np # Compute the mean clustering coefficient and average shortest path length # for an equivalent random graph randMetrics = {"C": [], "L": []} for i in range(nrand): Gr = random_reference(G, niter=niter, seed=seed) randMetrics["C"].append(nx.transitivity(Gr)) randMetrics["L"].append(nx.average_shortest_path_length(Gr)) C = nx.transitivity(G) L = nx.average_shortest_path_length(G) Cr = np.mean(randMetrics["C"]) Lr = np.mean(randMetrics["L"]) sigma = (C / Cr) / (L / Lr) return sigma
path = '../data/karate/karate.gml' nodes, edges = gml_data.load_gml_data(path) G = nx.Graph() G.add_nodes_from(nodes) G.add_edges_from(edges) # 输出节点信息 print(G.nodes(data=True)) # 输出边信息 print(G.edges) # 计算图或者网络的传递性 print(nx.transitivity(G)) # 节点个数 print(G.number_of_nodes()) # 边数 print(G.number_of_edges()) # 节点邻居个数 print(G.neighbors(1)) import igraph g = igraph.Graph([(0,1), (0,2), (2,3), (3,4), (4,2), (2,5), (5,0), (6,3), (5,6)]) igraph.plot(g, target="/tmp/igraph_demo.png")
degree_cen = nx.degree_centrality(G_r[repo]) cens[repo] = (max(cns) * len(G_r[repo]) - sum(cns)) / ( len(G_r[repo]) - 2) if len(G_r[repo]) > 2 else 0 sizes[repo] = len(G_r[repo].nodes) if not nx.is_connected(G): # print(tm) pass # continue degree_cen = nx.degree_centrality(G).items() degree_cen = sorted(degree_cen, key=lambda x: x[1]) nodes = [d_c[0] for d_c in degree_cen] cns = [d_c[1] for d_c in degree_cen] centers['all'] = nodes[cns.index(cns[-1]):] aspls['all'] = nx.average_shortest_path_length(G) if nx.is_connected( G) else -1 acs['all'] = nx.transitivity(G) if nx.is_connected(G) else -1 cens['all'] = (cns[-1] * len(G) - sum(cns)) / (len(G) - 2) if len(G) > 2 else 0 team_centers.append(centers) team_acs.append(acs) team_aspls.append(aspls) team_cens.append(cens) team_sizes.append(sizes) # print(len(team_cens)) print("Computing Existing Duration...") network_time = {} with open(time_filename) as tf: for line in tf.readlines(): m1, m2, t1, t2 = line.strip().split('\t') if m1 in network_time:
print(kmin_mul) print(kmean_mul) print(avDegree_mul) """ Results.write("k max\t%.2f\t%.2f\t%.2f\n" % (kmax_lit, kmax_bin, kmax_mul)) Results.write("k min\t%.2f\t%.2f\t%.2f\n" % (kmin_lit, kmin_bin, kmin_mul)) densidad_bin = nx.density(Gbin) densidad_lit = nx.density(Glit) densidad_mul = nx.density(Gmul) Results.write("density\t%.4f\t%.4f\t%.4f\n" % (densidad_lit, densidad_bin, densidad_mul)) c_global_lit = nx.transitivity(Glit) c_global_bin = nx.transitivity(Gbin) c_global_mul = nx.transitivity(Gmul) ci_lit = nx.average_clustering(Glit) ci_bin = nx.average_clustering(Gbin) ci_mul = nx.average_clustering(Gmul) Results.write("C glob.\t%.4f\t%.4f\t%.4f\n" % (c_global_lit, c_global_bin, c_global_mul)) Results.write("C_i\t\t%.4f\t%.4f\t%.4f\n" % (ci_lit, ci_bin, ci_mul)) ####### lit componentes_lit = nx.connected_component_subgraphs(Glit) diametros_lit = [] for c in componentes_lit:
def printGraphProperty(G, score_name): total_nodes = G.number_of_nodes() total_edges = G.number_of_edges() trans = nx.transitivity(G) print ("score\tnodes\tedges\ttransitivity") print ("%s\t%d\t%d\t%.3f" % (score_name, total_nodes, total_edges, trans))
node_size=5, label="Students") plt.legend(loc="lower right", facecolor='grey') nx.draw_networkx_edges(P, pos=nx.random_layout(P), with_labels=False, edge_color="red", width=0.03, label="Student-Student") plt.title("Projected bipartite graph of students", size=20) ax2 = plt.axes() ax2.set_facecolor("lavender") plt.show() clustering = nx.average_clustering(P) transitivity = nx.transitivity(P) print("\n\n\nclustering:\n", clustering, file=f) print("\n\n\ntransitivity:\n", transitivity, file=f) degCent = nx.degree_centrality(G) degCent_classes = dict( (key, value) for key, value in degCent.items() if not re.match("\d+", key)) sorted_degCent = sorted(degCent_classes.items(), key=operator.itemgetter(1), reverse=True) # # print(sorted_degCent) # closeCent = nx.closeness_centrality(G, wf_improved=True) closeCent_classes = dict((key, value) for key, value in closeCent.items() if not re.match("\d+", key)) sorted_closeCent = sorted(closeCent_classes.items(),
def get_transitivity(self): try: return nx.transitivity(self.graph) except Exception, e: print traceback.print_exc()
def print_basic_graph_properties(G, file_path="graph/graph_properties.txt"): output_string = "" if type(G) != nx.classes.digraph.DiGraph: raise Exception("NetworkX directed graph expected") output_string += " Type of object " + str(type(G)) + "\n" output_string += " It has " + str(len(G.nodes())) + " nodes and " +\ str(len(G.edges()))+ " edges \n" pathlengths = [] output_string += "source vertex {target:length, } for some nodes \ n " count = 0 for v in G.nodes(): # Compute the shortest path lengths from source to all reachable nodes spl = nx.single_source_shortest_path_length(G, v) count += 1 if count < 20: output_string += '%s %s' % (v, spl) output_string += "\n" for p in spl.values(): pathlengths.append(p) # histogram of lengths of paths histogram_graph(pathlengths, "Distribución de la menor longitud de los caminos", oyellow, "images/pathlengths_distribution.png") output_string += " \n" output_string += " ****** average shortest path length %s" % ( sum(pathlengths) / len(pathlengths)) + " \n" # Strongly connected component is_wk_connected = nx.is_weakly_connected(G) output_string += " Is the graph strongly connected? -> " + str( nx.is_strongly_connected(G)) + " \n" n = nx.number_strongly_connected_components(G) output_string += "It has " + str(n) + " strongly connected components \n" # time consuming largest = max(nx.strongly_connected_component_subgraphs(G), key=len) output_string += "the largest strongly connected component has " + str( len(largest)) + " nodes, which are a " + str( len(largest) / len(G) * 100) + "% of total nodes \n" output_string += "for the largest component, the descriptive measures are: \n" output_string += basic_measures(largest) # Weakly connected component output_string += " Is the graph weakly connected? -> " + str( nx.is_weakly_connected(G)) + " \n" n = nx.number_weakly_connected_components(G) output_string += "It has " + str(n) + " weakly connected components \n" # time consuming largest = max(nx.weakly_connected_component_subgraphs(G), key=len) output_string += "the largest weakly connected component has " + str( len(largest)) + " nodes, which are a " + str( len(largest) / len(G) * 100) + "% of total nodes \n" degree_sequence = [d for n, d in G.degree()] histogram_graph(degree_sequence, "Distribucion del grado", oyellow, "images/degree_distribution.png") degree_sequence = [d for n, d in G.in_degree()] histogram_graph(degree_sequence, "Distribución del in-degree", oyellow, "images/indegree_distribution.png") degree_sequence = [d for n, d in G.out_degree()] histogram_graph(degree_sequence, "Distribución del out-degree", oyellow, "images/outdegree_distribution.png") output_string += "El coeficiente de transitividad del grafo es " + str( nx.transitivity(G)) print(output_string) f = open(file_path, 'w') f.write(output_string) f.close() return
for grado in deg.values(): a.append(grado) print("iv.2) el grado màximo es", max(a)) print("iv.3) el grado mìnimo es", min(a)) ##Densidad print("v) la densidad de la red 1 es", nx.density(G1)) print("v) la densidad de la red 2 es", nx.density(G2)) print("v) la densidad de la red 2 es", nx.density(G3)) #Coef de Clústering promedio CC1 = nx.average_clustering(G1) print("el coef de Clústering <C> de la red es", CC1) CC2 = nx.average_clustering(G2) print("el coef de Clústering <C> de la red es", CC2) CC3 = nx.average_clustering(G3) print("el coef de Clústering <C> de la red es", CC3) #Coef de Clustering Global CCG1 = nx.transitivity(G1) print("el coef de Clústering global de la red 1 es", CCG1) CCG2 = nx.transitivity(G2) print("el coef de Clústering global de la red 2 es", CCG2) CCG3 = nx.transitivity(G3) print("el coef de Clústering global de la red 3 es", CCG3)
def spearman(G_times, anomaly_ranks, directed, window, initial_period, plot=False): max_time = len(G_times) t = list(range(0, max_time)) avg_clustering = [] avg_weight = [] total_edges = [] avg_clustering = [] avg_degree = [] transitivity = [] if (directed): num_strong = [] num_weak = [] else: num_connected_components = [] for G in G_times: weights = list(nx.get_edge_attributes(G, 'weight').values()) degrees = list(G.degree) sum_degree = 0 for (v, d) in degrees: sum_degree = sum_degree + d total_edges.append(G.number_of_edges()) avg_degree.append(sum_degree / len(degrees)) if (len(weights) > 0): avg_weight.append(sum(weights) / len(weights)) avg_clustering.append(nx.average_clustering(G)) transitivity.append(nx.transitivity(G)) if (directed): num_strong.append(nx.number_strongly_connected_components(G)) num_weak.append(nx.number_weakly_connected_components(G)) else: num_connected_components.append(nx.number_connected_components(G)) if (len(avg_weight) > 0): ranks = rank_outliers(avg_weight, window=window, initial_period=initial_period) (corr, p_test) = spearmanr(anomaly_ranks, ranks) if (plot): normal_util.plot_ranks(anomaly_ranks, ranks, "avg_weight") print("spearman rank correlation with avg edge weight is " + str(corr)) print("p-test with avg edge weight is " + str(p_test)) print() ranks = rank_outliers(avg_clustering, window=window, initial_period=initial_period) (corr, p_test) = spearmanr(anomaly_ranks, ranks) # if (plot): # normal_util.plot_ranks(anomaly_ranks, ranks, "avg_clustering") # print ("spearman rank correlation with avg clustering coefficient is " + str(corr)) # print ("p-test with avg clustering coefficient is " + str(p_test)) # print () if (directed): ranks = rank_outliers(num_weak, window=window, initial_period=initial_period) (corr, p_test) = spearmanr(anomaly_ranks, ranks) if (plot): normal_util.plot_ranks(anomaly_ranks, ranks, "weak_connected") print( "spearman rank correlation with number of weakly connected components is " + str(corr)) print("p-test with number of weakly connected components is " + str(p_test)) print() ranks = rank_outliers(num_strong, window=window, initial_period=initial_period) (corr, p_test) = spearmanr(anomaly_ranks, ranks) if (plot): normal_util.plot_ranks(anomaly_ranks, ranks, "strong_connected") print( "spearman rank correlation with number of strongly connected components is " + str(corr)) print("p-test with number of strongly connected components is " + str(p_test)) print() else: ranks = rank_outliers(num_connected_components, window=window, initial_period=initial_period) (corr, p_test) = spearmanr(anomaly_ranks, ranks) if (plot): normal_util.plot_ranks(anomaly_ranks, ranks, "num_connected") print( "spearman rank correlation with number of connected components is " + str(corr)) print("p-test with number of connected components is " + str(p_test)) print() ranks = rank_outliers(transitivity, window=window, initial_period=initial_period) (corr, p_test) = spearmanr(anomaly_ranks, ranks) if (plot): normal_util.plot_ranks(anomaly_ranks, ranks, "transitivity") print("spearman rank correlation with transitivity is " + str(corr)) print("p-test with transitivity is " + str(p_test)) print() ranks = rank_outliers(total_edges, window=window, initial_period=initial_period) (corr, p_test) = spearmanr(anomaly_ranks, ranks) if (plot): normal_util.plot_ranks(anomaly_ranks, ranks, "num_edges") print("spearman rank correlation with total number of edges is " + str(corr)) print("p-test with total number of edges is " + str(p_test)) print() ranks = rank_outliers(avg_degree, window=window, initial_period=initial_period) (corr, p_test) = spearmanr(anomaly_ranks, ranks) if (plot): normal_util.plot_ranks(anomaly_ranks, ranks, "average_degree") print("spearman rank correlation with average degree is " + str(corr)) print("p-test with average degree is " + str(p_test)) print()
def compute_summaries(G): """ Compute network features, computational times and their nature. Evaluate 54 summary statistics of a network G, plus 4 noise variables, store the computational time to evaluate each summary statistic, and keep track of their nature (discrete or not). Args: G (networkx.classes.graph.Graph): an undirected networkx graph. Returns: resDicts (tuple): a tuple containing the elements: - dictSums (dict): a dictionary with the name of the summaries as keys and the summary statistic values as values; - dictTimes (dict): a dictionary with the name of the summaries as keys and the time to compute each one as values; - dictIsDist (dict): a dictionary indicating if the summary is discrete (True) or not (False). """ dictSums = dict() # Will store the summary statistic values dictTimes = dict() # Will store the evaluation times dictIsDisc = dict() # Will store the summary statistic nature # Extract the largest connected component Gcc = sorted(nx.connected_components(G), key=len, reverse=True) G_lcc = G.subgraph(Gcc[0]) # Number of edges start = time.time() dictSums["num_edges"] = G.number_of_edges() dictTimes["num_edges"] = time.time() - start dictIsDisc["num_edges"] = True # Number of connected components start = time.time() dictSums["num_of_CC"] = nx.number_connected_components(G) dictTimes["num_of_CC"] = time.time() - start dictIsDisc["num_of_CC"] = True # Number of nodes in the largest connected component start = time.time() dictSums["num_nodes_LCC"] = nx.number_of_nodes(G_lcc) dictTimes["num_nodes_LCC"] = time.time() - start dictIsDisc["num_nodes_LCC"] = True # Number of edges in the largest connected component start = time.time() dictSums["num_edges_LCC"] = G_lcc.number_of_edges() dictTimes["num_edges_LCC"] = time.time() - start dictIsDisc["num_edges_LCC"] = True # Diameter of the largest connected component start = time.time() dictSums["diameter_LCC"] = nx.diameter(G_lcc) dictTimes["diameter_LCC"] = time.time() - start dictIsDisc["diameter_LCC"] = True # Average geodesic distance (shortest path length in the LCC) start = time.time() dictSums["avg_geodesic_dist_LCC"] = nx.average_shortest_path_length(G_lcc) dictTimes["avg_geodesic_dist_LCC"] = time.time() - start dictIsDisc["avg_geodesic_dist_LCC"] = False # Average degree of the neighborhood of each node start = time.time() dictSums["avg_deg_connectivity"] = np.mean( list(nx.average_degree_connectivity(G).values())) dictTimes["avg_deg_connectivity"] = time.time() - start dictIsDisc["avg_deg_connectivity"] = False # Average degree of the neighbors of each node in the LCC start = time.time() dictSums["avg_deg_connectivity_LCC"] = np.mean( list(nx.average_degree_connectivity(G_lcc).values())) dictTimes["avg_deg_connectivity_LCC"] = time.time() - start dictIsDisc["avg_deg_connectivity_LCC"] = False # Recover the degree distribution start_degree_extract = time.time() degree_vals = list(dict(G.degree()).values()) degree_extract_time = time.time() - start_degree_extract # Entropy of the degree distribution start = time.time() dictSums["degree_entropy"] = ss.entropy(degree_vals) dictTimes["degree_entropy"] = time.time() - start + degree_extract_time dictIsDisc["degree_entropy"] = False # Maximum degree start = time.time() dictSums["degree_max"] = max(degree_vals) dictTimes["degree_max"] = time.time() - start + degree_extract_time dictIsDisc["degree_max"] = True # Average degree start = time.time() dictSums["degree_mean"] = np.mean(degree_vals) dictTimes["degree_mean"] = time.time() - start + degree_extract_time dictIsDisc["degree_mean"] = False # Median degree start = time.time() dictSums["degree_median"] = np.median(degree_vals) dictTimes["degree_median"] = time.time() - start + degree_extract_time dictIsDisc["degree_median"] = False # Standard deviation of the degree distribution start = time.time() dictSums["degree_std"] = np.std(degree_vals) dictTimes["degree_std"] = time.time() - start + degree_extract_time dictIsDisc["degree_std"] = False # Quantile 25% start = time.time() dictSums["degree_q025"] = np.quantile(degree_vals, 0.25) dictTimes["degree_q025"] = time.time() - start + degree_extract_time dictIsDisc["degree_q025"] = False # Quantile 75% start = time.time() dictSums["degree_q075"] = np.quantile(degree_vals, 0.75) dictTimes["degree_q075"] = time.time() - start + degree_extract_time dictIsDisc["degree_q075"] = False # Average geodesic distance start = time.time() dictSums["avg_shortest_path_length_LCC"] = nx.average_shortest_path_length( G_lcc) dictTimes["avg_shortest_path_length_LCC"] = time.time() - start dictIsDisc["avg_shortest_path_length_LCC"] = False # Average global efficiency: # The efficiency of a pair of nodes in a graph is the multiplicative # inverse of the shortest path distance between the nodes. # The average global efficiency of a graph is the average efficiency of # all pairs of nodes. start = time.time() dictSums["avg_global_efficiency"] = nx.global_efficiency(G) dictTimes["avg_global_efficiency"] = time.time() - start dictIsDisc["avg_global_efficiency"] = False # Harmonic mean which is 1/avg_global_efficiency start = time.time() dictSums["harmonic_mean"] = nx.global_efficiency(G) dictTimes["harmonic_mean"] = time.time() - start dictIsDisc["harmonic_mean"] = False # Average local efficiency # The local efficiency of a node in the graph is the average global # efficiency of the subgraph induced by the neighbors of the node. # The average local efficiency is the average of the # local efficiencies of each node. start = time.time() dictSums["avg_local_efficiency_LCC"] = nx.local_efficiency(G_lcc) dictTimes["avg_local_efficiency_LCC"] = time.time() - start dictIsDisc["avg_local_efficiency_LCC"] = False # Node connectivity # The node connectivity is equal to the minimum number of nodes that # must be removed to disconnect G or render it trivial. # Only on the largest connected component here. start = time.time() dictSums["node_connectivity_LCC"] = nx.node_connectivity(G_lcc) dictTimes["node_connectivity_LCC"] = time.time() - start dictIsDisc["node_connectivity_LCC"] = True # Edge connectivity # The edge connectivity is equal to the minimum number of edges that # must be removed to disconnect G or render it trivial. # Only on the largest connected component here. start = time.time() dictSums["edge_connectivity_LCC"] = nx.edge_connectivity(G_lcc) dictTimes["edge_connectivity_LCC"] = time.time() - start dictIsDisc["edge_connectivity_LCC"] = True # Graph transitivity # 3*times the number of triangles divided by the number of triades start = time.time() dictSums["transitivity"] = nx.transitivity(G) dictTimes["transitivity"] = time.time() - start dictIsDisc["transitivity"] = False # Number of triangles start = time.time() dictSums["num_triangles"] = np.sum(list(nx.triangles(G).values())) / 3 dictTimes["num_triangles"] = time.time() - start dictIsDisc["num_triangles"] = True # Estimate of the average clustering coefficient of G: # Average local clustering coefficient, with local clustering coefficient # defined as C_i = (nbr of pairs of neighbors of i that are connected)/(nbr of pairs of neighbors of i) start = time.time() dictSums["avg_clustering_coef"] = nx.average_clustering(G) dictTimes["avg_clustering_coef"] = time.time() - start dictIsDisc["avg_clustering_coef"] = False # Square clustering (averaged over nodes): # the fraction of possible squares that exist at the node. # We average it over nodes start = time.time() dictSums["square_clustering_mean"] = np.mean( list(nx.square_clustering(G).values())) dictTimes["square_clustering_mean"] = time.time() - start dictIsDisc["square_clustering_mean"] = False # We compute the median start = time.time() dictSums["square_clustering_median"] = np.median( list(nx.square_clustering(G).values())) dictTimes["square_clustering_median"] = time.time() - start dictIsDisc["square_clustering_median"] = False # We compute the standard deviation start = time.time() dictSums["square_clustering_std"] = np.std( list(nx.square_clustering(G).values())) dictTimes["square_clustering_std"] = time.time() - start dictIsDisc["square_clustering_std"] = False # Number of 2-cores start = time.time() dictSums["num_2cores"] = len(nx.k_core(G, k=2)) dictTimes["num_2cores"] = time.time() - start dictIsDisc["num_2cores"] = True # Number of 3-cores start = time.time() dictSums["num_3cores"] = len(nx.k_core(G, k=3)) dictTimes["num_3cores"] = time.time() - start dictIsDisc["num_3cores"] = True # Number of 4-cores start = time.time() dictSums["num_4cores"] = len(nx.k_core(G, k=4)) dictTimes["num_4cores"] = time.time() - start dictIsDisc["num_4cores"] = True # Number of 5-cores start = time.time() dictSums["num_5cores"] = len(nx.k_core(G, k=5)) dictTimes["num_5cores"] = time.time() - start dictIsDisc["num_5cores"] = True # Number of 6-cores start = time.time() dictSums["num_6cores"] = len(nx.k_core(G, k=6)) dictTimes["num_6cores"] = time.time() - start dictIsDisc["num_6cores"] = True # Number of k-shells # The k-shell is the subgraph induced by nodes with core number k. # That is, nodes in the k-core that are not in the k+1-core # Number of 2-shells start = time.time() dictSums["num_2shells"] = len(nx.k_shell(G, 2)) dictTimes["num_2shells"] = time.time() - start dictIsDisc["num_2shells"] = True # Number of 3-shells start = time.time() dictSums["num_3shells"] = len(nx.k_shell(G, 3)) dictTimes["num_3shells"] = time.time() - start dictIsDisc["num_3shells"] = True # Number of 4-shells start = time.time() dictSums["num_4shells"] = len(nx.k_shell(G, 4)) dictTimes["num_4shells"] = time.time() - start dictIsDisc["num_4shells"] = True # Number of 5-shells start = time.time() dictSums["num_5shells"] = len(nx.k_shell(G, 5)) dictTimes["num_5shells"] = time.time() - start dictIsDisc["num_5shells"] = True # Number of 6-shells start = time.time() dictSums["num_6shells"] = len(nx.k_shell(G, 6)) dictTimes["num_6shells"] = time.time() - start dictIsDisc["num_6shells"] = True start = time.time() listOfCliques = list(nx.enumerate_all_cliques(G)) enum_all_cliques_time = time.time() - start # Number of 4-cliques start = time.time() n4Clique = 0 for li in listOfCliques: if len(li) == 4: n4Clique += 1 dictSums["num_4cliques"] = n4Clique dictTimes["num_4cliques"] = time.time() - start + enum_all_cliques_time dictIsDisc["num_4cliques"] = True # Number of 5-cliques start = time.time() n5Clique = 0 for li in listOfCliques: if len(li) == 5: n5Clique += 1 dictSums["num_5cliques"] = n5Clique dictTimes["num_5cliques"] = time.time() - start + enum_all_cliques_time dictIsDisc["num_5cliques"] = True # Maximal size of a clique in the graph start = time.time() dictSums["max_clique_size"] = len(approximation.clique.max_clique(G)) dictTimes["max_clique_size"] = time.time() - start dictIsDisc["max_clique_size"] = True # Approximated size of a large clique in the graph start = time.time() dictSums["large_clique_size"] = approximation.large_clique_size(G) dictTimes["large_clique_size"] = time.time() - start dictIsDisc["large_clique_size"] = True # Number of shortest path of size k start = time.time() listOfPLength = list(nx.shortest_path_length(G)) path_length_time = time.time() - start # when k = 3 start = time.time() n3Paths = 0 for node in G.nodes(): tmp = list(listOfPLength[node][1].values()) n3Paths += tmp.count(3) dictSums["num_shortest_3paths"] = n3Paths / 2 dictTimes["num_shortest_3paths"] = time.time() - start + path_length_time dictIsDisc["num_shortest_3paths"] = True # when k = 4 start = time.time() n4Paths = 0 for node in G.nodes(): tmp = list(listOfPLength[node][1].values()) n4Paths += tmp.count(4) dictSums["num_shortest_4paths"] = n4Paths / 2 dictTimes["num_shortest_4paths"] = time.time() - start + path_length_time dictIsDisc["num_shortest_4paths"] = True # when k = 5 start = time.time() n5Paths = 0 for node in G.nodes(): tmp = list(listOfPLength[node][1].values()) n5Paths += tmp.count(5) dictSums["num_shortest_5paths"] = n5Paths / 2 dictTimes["num_shortest_5paths"] = time.time() - start + path_length_time dictIsDisc["num_shortest_5paths"] = True # when k = 6 start = time.time() n6Paths = 0 for node in G.nodes(): tmp = list(listOfPLength[node][1].values()) n6Paths += tmp.count(6) dictSums["num_shortest_6paths"] = n6Paths / 2 dictTimes["num_shortest_6paths"] = time.time() - start + path_length_time dictIsDisc["num_shortest_6paths"] = True # Size of the minimum (weight) node dominating set: # A subset of nodes where each node not in the subset has for direct # neighbor a node of the dominating set. start = time.time() T = approximation.min_weighted_dominating_set(G) dictSums["size_min_node_dom_set"] = len(T) dictTimes["size_min_node_dom_set"] = time.time() - start dictIsDisc["size_min_node_dom_set"] = True # Idem but with the edge dominating set start = time.time() T = approximation.min_edge_dominating_set(G) dictSums["size_min_edge_dom_set"] = 2 * len( T) # times 2 to have a number of nodes dictTimes["size_min_edge_dom_set"] = time.time() - start dictIsDisc["size_min_edge_dom_set"] = True # The Wiener index of a graph is the sum of the shortest-path distances # between each pair of reachable nodes. For pairs of nodes in undirected graphs, # only one orientation of the pair is counted. # (On LCC otherwise inf) start = time.time() dictSums["wiener_index_LCC"] = nx.wiener_index(G_lcc) dictTimes["wiener_index_LCC"] = time.time() - start dictIsDisc["wiener_index_LCC"] = True # Betweenness node centrality (averaged over nodes): # at node u it is defined as B_u = sum_i,j sigma(i,u,j)/sigma(i,j) # where sigma is the number of shortest path between i and j going through u or not start = time.time() betweenness = list(nx.betweenness_centrality(G).values()) time_betweenness = time.time() - start # Averaged across nodes start = time.time() dictSums["betweenness_centrality_mean"] = np.mean(betweenness) dictTimes["betweenness_centrality_mean"] = time.time( ) - start + time_betweenness dictIsDisc["betweenness_centrality_mean"] = False # Maximum across nodes start = time.time() dictSums["betweenness_centrality_max"] = max(betweenness) dictTimes["betweenness_centrality_max"] = time.time( ) - start + time_betweenness dictIsDisc["betweenness_centrality_max"] = False # Central point dominance # CPD = sum_u(B_max - B_u)/(N-1) start = time.time() dictSums["central_point_dominance"] = sum( max(betweenness) - np.array(betweenness)) / (len(betweenness) - 1) dictTimes["central_point_dominance"] = time.time( ) - start + time_betweenness dictIsDisc["central_point_dominance"] = False # Estrata index : sum_i^n exp(lambda_i) # with n the number of nodes, lamda_i the i-th eigen value of the adjacency matrix of G start = time.time() dictSums["Estrata_index"] = nx.estrada_index(G) dictTimes["Estrata_index"] = time.time() - start dictIsDisc["Estrata_index"] = False # Eigenvector centrality # For each node, it is the average eigenvalue centrality of its neighbors, # where centrality of node i is taken as the i-th coordinate of x # such that Ax = lambda*x (for the maximal eigen value) # Averaged start = time.time() dictSums["avg_eigenvec_centrality"] = np.mean( list(nx.eigenvector_centrality_numpy(G).values())) dictTimes["avg_eigenvec_centrality"] = time.time() - start dictIsDisc["avg_eigenvec_centrality"] = False # Maximum start = time.time() dictSums["max_eigenvec_centrality"] = max( list(nx.eigenvector_centrality_numpy(G).values())) dictTimes["max_eigenvec_centrality"] = time.time() - start dictIsDisc["max_eigenvec_centrality"] = False ### Noise generation ### # Noise simulated from a Normal(0,1) distribution start = time.time() dictSums["noise_Gauss"] = ss.norm.rvs(0, 1) dictTimes["noise_Gauss"] = time.time() - start dictIsDisc["noise_Gauss"] = False # Noise simulated from a Uniform distribution [0-50] start = time.time() dictSums["noise_Unif"] = ss.uniform.rvs(0, 50) dictTimes["noise_Unif"] = time.time() - start dictIsDisc["noise_Unif"] = False # Noise simulated from a Bernoulli B(0.5) distribution start = time.time() dictSums["noise_Bern"] = ss.bernoulli.rvs(0.5) dictTimes["noise_Bern"] = time.time() - start dictIsDisc["noise_Bern"] = True # Noise simulated from a discrete uniform distribution [0,50[ start = time.time() dictSums["noise_disc_Unif"] = ss.randint.rvs(0, 50) dictTimes["noise_disc_Unif"] = time.time() - start dictIsDisc["noise_disc_Unif"] = True resDicts = (dictSums, dictTimes, dictIsDisc) return resDicts
# -*- coding: utf-8 -*- import networkx as nx import community import matplotlib.pyplot as plt #path="/PycharmProjects/untitled/polbooks.gml" Graph = nx.read_gml("dolphins.gml") print Graph.graph #输出节点信息 print Graph.nodes(data=True) #输出边信息 print Graph.edges() #计算图或网络的传递性 print nx.transitivity(Graph) #节点个数 print Graph.number_of_nodes() #边数 print Graph.number_of_edges() #节点邻居的个数 #print Graph.neighbors(1) # 图划分 part = part = community.best_partition(Graph) print part #计算模块度 mod = community.modularity(part, Graph) print mod community.best_partition(Graph, ) #绘图 values = [part.get(node) for node in Graph.nodes()]
f.write('\nРёбер ' + str(G.number_of_edges())) print('Радиус:') f.write('\nРадиус компонент связности:\n') components = list(nx.connected_component_subgraphs(G)) for comp in sorted(components, key=lambda c: c.size(), reverse=True): print(nx.radius(comp)) f.write(str(nx.radius(comp)) + '\n') print('Центральные вершины:') f.write('Центральные вершины:\n') deg = nx.degree_centrality(G) for nodeid in sorted(deg, key=deg.get, reverse=True): if nodeid <= len(labels): print(nodeid, labels[nodeid]) f.write(str(nodeid) + ' ' + labels[nodeid]) f.write('\n') print('Кластерный коэффициент:') print(nx.average_clustering(G)) f.write('Кластерный коэффициент:\n' + str(nx.average_clustering(G))) print(nx.transitivity(G)) f.write('\n' + str(nx.transitivity(G))) f.close() import matplotlib.pyplot as plt pos = nx.spring_layout(G) nx.draw_networkx_nodes(G, pos, node_color='red', node_size=10) nx.draw_networkx_edges(G, pos, edge_color='blue') nx.draw_networkx_labels(G, pos, font_size=14, font_family='Arial') plt.axis('off') plt.show()
def processa(nomeEntrada, toStdOut=False): """Processa arquivo da rede, pondo as saídas na pasta com o nome 'out/<rede>' Saídas: - ? """ pastaSaída = criaPastaSaída(nomeEntrada) with open(pastaSaída + '/stats.txt', 'w') as arq: def printa(*args): """Escreve a saída no arquivo de estatísticas, pliz""" if toStdOut: print(' ', *args) else: arq.write(' '.join(map(str, args))) arq.write('\n') # lê a rede do arquivo de entrada G = nx.read_weighted_edgelist(nomeEntrada, nodetype=int, comments='%') # e acha o maior componente maiorComponente = max(nx.connected_component_subgraphs(G), key=len) distribuiçãoDeGraus = list(map(lambda t: t[1], G.degree())) histograma = nx.degree_histogram(G) n_nós = len(G) probabilidadeGraus = list(map(lambda x: x / n_nós, histograma)) grausAcumulados = np.flipud(np.cumsum(np.flipud(probabilidadeGraus))) # distribuição lei de potência leiPotência = powerlaw.Fit(distribuiçãoDeGraus, fit_method='KS') printa('Lei de potência - Alpha:', leiPotência.power_law.alpha, '\txmin:', leiPotência.power_law.xmin) # cálculo de medidas globais printa('Grau médio:', sum(distribuiçãoDeGraus) / n_nós) printa( 'Segundo momento da distribuição do grau:', reduce(lambda acc, ki: acc + ki**2, distribuiçãoDeGraus, 0) / n_nós) printa( 'Entropia de Shannon:', -reduce(lambda acc, prob: acc + prob * math.log(prob), grausAcumulados)) printa('Média do coeficiente de aglomeração local:', nx.average_clustering(G)) printa('Coeficiente de aglomeração pela fórmula da transitividade:', nx.transitivity(G)) printa('Média dos menores caminhos:', nx.average_shortest_path_length(maiorComponente)) printa('Eficiência:', eficiência(G)) printa('Diâmetro:', nx.diameter(maiorComponente)) # cálculo de clustering distribuiçãoAglomeração = list(nx.clustering(G).values()) pirso = np.array( list( filter(lambda x: x[0] != 0 and x[1] != 0, zip(distribuiçãoDeGraus, distribuiçãoAglomeração)))) printa('Correlação de Pearson de k(i) X cc(i):', stats.pearsonr(np.log10(pirso[:, 0]), np.log10(pirso[:, 1]))[0]) ## Plots ## # plot da distribuição do grau plt.figure('Distribuição do grau') plt.clf() plt.plot(probabilidadeGraus, 'r-', label='Probabilidade') plt.plot(grausAcumulados, 'b-', label='Prob. acumulada complementar') plt.title('Distribuição do grau') plt.yscale('log') plt.xscale('log') plt.legend(loc='lower left') plt.savefig(pastaSaída + '/dist-grau.png') # plot do k(i) vs cc(i) plt.figure('k(i) X cc(i)') plt.clf() plt.plot(distribuiçãoDeGraus, distribuiçãoAglomeração, 'bo') plt.title('Distribuição de grau X coeficiente de aglomeração') plt.xlabel('k(i)') plt.ylabel('cc(i)') plt.yscale('log') plt.xscale('log') plt.savefig(pastaSaída + '/kXcc.png') # plot do coeficiente de aglomeração acumulado plt.figure('Coeficiente de aglomeração') plt.clf() plt.hist(distribuiçãoAglomeração, bins=100, histtype='step', normed=True, cumulative=True) plt.xlabel('cc') plt.ylabel('P (X < x)') plt.title( 'Distribuição de probabilidade acumulada do coeficiente de aglomeração local' ) plt.savefig(pastaSaída + '/aglomeração.png')
def get_clustering_values(gcc, ego) -> tuple: clustering_coefficient = nx.clustering(gcc, ego) avg_clustering_coefficient = nx.average_clustering(gcc) transitivity = nx.transitivity(gcc) return clustering_coefficient, avg_clustering_coefficient, transitivity
def get_small_worldness(filename): import networkx as nx threshold = 0 f = open(filename[:-4] + '_small_worldness.dat', 'w') for i in range(0, 101): threshold = float(i) / 100 G = get_threshold_matrix(filename, threshold) ER_graph = nx.erdos_renyi_graph(nx.number_of_nodes(G), nx.density(G)) cluster = nx.average_clustering(G) ER_cluster = nx.average_clustering(ER_graph) transi = nx.transitivity(G) ER_transi = nx.transitivity(ER_graph) print 'threshold: %f, average cluster coefficient: %f, random nw: %f, transitivity: %f, random nw: %f' % ( threshold, cluster, ER_cluster, transi, ER_transi) f.write("%f\t%f\t%f" % (threshold, cluster, ER_cluster)) components = nx.connected_component_subgraphs(G) ER_components = nx.connected_component_subgraphs(ER_graph) values = [] ER_values = [] for i in range(len(components)): if nx.number_of_nodes(components[i]) > 1: values.append(nx.average_shortest_path_length(components[i])) for i in range(len(ER_components)): if nx.number_of_nodes(ER_components[i]) > 1: ER_values.append( nx.average_shortest_path_length(ER_components[i])) if len(values) == 0: f.write("\t0.") else: f.write("\t%f" % (sum(values) / len(values))) if len(ER_values) == 0: f.write("\t0.") else: f.write("\t%f" % (sum(ER_values) / len(ER_values))) f.write("\t%f\t%f" % (transi, ER_transi)) if (ER_cluster * sum(values) * len(values) * sum(ER_values) * len(ER_values)) > 0: S_WS = (cluster / ER_cluster) / ((sum(values) / len(values)) / (sum(ER_values) / len(ER_values))) else: S_WS = 0. if (ER_transi * sum(values) * len(values) * sum(ER_values) * len(ER_values)) > 0: S_Delta = (transi / ER_transi) / ( (sum(values) / len(values)) / (sum(ER_values) / len(ER_values))) else: S_Delta = 0. f.write("\t%f\t%f" % (S_WS, S_Delta)) f.write("\n") f.close() print "1:threshold 2:cluster-coefficient 3:random-cluster-coefficient 4:shortest-pathlength 5:random-shortest-pathlength 6:transitivity 7:random-transitivity 8:S-Watts-Strogatz 9:S-transitivity"
def answer_fourteen(): G = answer_thirteen() return nx.transitivity(G), nx.average_clustering(G)
G.add_edge("B", "D") # report print(nx.info(G)) # reports basic info about network print("########### nodes #############") # print(G.number_of_nodes()) print(G.nodes()) print("########### edges #############") # print(G.number_of_edges()) print(G.edges()) # report adjacencies print("########### adjacenies ########") for node in G.nodes: print( node + ": " + str(list(G.adj[node])) ) # it's good to use list function before printing # list function turns native nx dictionary into more human-readable list print(list(G["A"])) # list form # print(G["jus"]) # dictionary form # density density = nx.density(G) print("########### density ###########") print("Network density:", density) # triads print("########### triads ############") triadic_closure = nx.transitivity(G) print("Triadic closure:", triadic_closure) # I don't know how transitivity is calculated here.
def omega(G, niter=100, nrand=10, seed=None): """Returns the small-world coefficient (omega) of a graph The small-world coefficient of a graph G is: omega = Lr/L - C/Cl where C and L are respectively the average clustering coefficient and average shortest path length of G. Lr is the average shortest path length of an equivalent random graph and Cl is the average clustering coefficient of an equivalent lattice graph. The small-world coefficient (omega) ranges between -1 and 1. Values close to 0 means the G features small-world characteristics. Values close to -1 means G has a lattice shape whereas values close to 1 means G is a random graph. Parameters ---------- G : NetworkX graph An undirected graph. niter: integer (optional, default=100) Approximate number of rewiring per edge to compute the equivalent random graph. nrand: integer (optional, default=10) Number of random graphs generated to compute the average clustering coefficient (Cr) and average shortest path length (Lr). seed : integer, random_state, or None (default) Indicator of random number generation state. See :ref:`Randomness<randomness>`. Returns ------- omega : float The small-work coefficient (omega) Notes ----- The implementation is adapted from the algorithm by Telesford et al. [1]_. References ---------- .. [1] Telesford, Joyce, Hayasaka, Burdette, and Laurienti (2011). "The Ubiquity of Small-World Networks". Brain Connectivity. 1 (0038): 367-75. PMC 3604768. PMID 22432451. doi:10.1089/brain.2011.0038. """ import numpy as np # Compute the mean clustering coefficient and average shortest path length # for an equivalent random graph randMetrics = {"C": [], "L": []} for i in range(nrand): Gr = random_reference(G, niter=niter, seed=seed) Gl = lattice_reference(G, niter=niter, seed=seed) randMetrics["C"].append(nx.transitivity(Gl)) randMetrics["L"].append(nx.average_shortest_path_length(Gr)) C = nx.transitivity(G) L = nx.average_shortest_path_length(G) Cl = np.mean(randMetrics["C"]) Lr = np.mean(randMetrics["L"]) omega = (Lr / L) - (C / Cl) return omega
from scipy import stats from operator import mul # or mul=lambda x,y:x*y from fractions import Fraction import sys # Calculates binomial coefficient (n over k) def nCk(n, k): return int(reduce(mul, (Fraction(n - i, i + 1) for i in range(k)), 1)) # Read the network in form of edge list, unweighted and undirected net = nx.read_edgelist(sys.argv[1], nodetype=int) # calculate the transitivity of the network C = nx.transitivity(net) # Make dictionary nodeID:degree d = dict(nx.degree(net)) # The branching is calculated as P2/P1 # The intermodular connectivity as P3/P2 suma1 = 0 P2 = 0 for key in d: suma1 += int(d[key]) P2 += nCk(int(d[key]), 2) P1 = suma1 * 0.5 C3 = C * P2 / 3.0 suma = 0 for u, v in net.edges(): suma = suma + (d[u] - 1) * (d[v] - 1)
def test_path(self): G = nx.path_graph(10) assert nx.transitivity(G) == 0.0
def transitivity_score(A, beta): G = nx.from_numpy_matrix(A) return nx.transitivity(G)
def test_k5(self): G = nx.complete_graph(5) assert nx.transitivity(G) == 1.0 G.remove_edge(1, 2) assert nx.transitivity(G) == 0.875
def createGraph(selfGraph): """ Create graph, network entity with networkx library from selfGraph (a dictionnary). Then look at his attributes, from a point of view of network theory """ #(0) Create network (empty) G = nx.Graph() #(1) Add Nodes G.add_nodes_from(list(selfGraph.keys())) #(2)) Add edges with weight specified directly #First, build edge set: edgesList=[] for node1 in selfGraph.keys(): for node2 in selfGraph[node1][1]: weight12=selfGraph[node1][1][node2]#weight of the edge, relatedness of 2 nodes if not [node2, node1, weight12] in edgesList: #as symmetric edges edgesList.append([node1, node2, weight12]) G.add_weighted_edges_from(edgesList) #G.add_edges_from() #G.add_edge(2, 3, weight=0.9) #(3) Add Attributes Nodes: attribute data to be in the form dictionary: keys: nodes name, values: attributes. #NB: can have different type attributes: nx.set_node_attributes(G, att_dic, 'name_att') weightNode={w: selfGraph[w][0] for w in selfGraph.keys()} nx.set_node_attributes(G, weightNode, 'relevancy') #To access them: G.nodes[node]['relevancy'] #(4) Add Attributes Edges. Dont need for now, as already added the weight in the edges. But could add other attributes here. #nx.set_edge_attributes(G, weightEdge, 'relatedness') #(5) Look at properties related to self Graph descriptionSelf=nx.info(G) + "\n" descriptionSelf+='Density of Self: {}'.format(nx.density(G)) + "\n" descriptionSelf+='Am I connected Connected? '+ str(nx.is_connected(G)) + "\n" components = nx.connected_components(G) descriptionSelf+='I have {} connected components'.format(nx.number_connected_components(G)) + "\n" largest_component = max(components, key=len) subSelf = G.subgraph(largest_component) # Create a "subgraph" of just the largest component diameter = nx.diameter(subSelf) descriptionSelf+='The diameter of my largest Connected Component is:'+ str(diameter) + "\n" #Transitivity, like density, expresses how interconnected a graph is in terms of a ratio of actual over possible connections. #Transitivity is the ratio of all triangles over all possible triangles. descriptionSelf+="My transitivity coefficient is"+ str(nx.transitivity(G)) + "\n" #Centrality node: Find which nodes are the most important ones in your network. degree_dict = dict(G.degree(G.nodes())) #degree is connectivity of each node: how many egde nx.set_node_attributes(G, degree_dict, 'degree') #First add degree each nodes as extra attribute sorted_degree = sorted(degree_dict.items(), key=operator.itemgetter(1), reverse=True) #sort this degree list #print(sorted_degree[:3]) descriptionSelf+= "The three bigger hubs in me are: " + ', '.join(elt[0] for elt in sorted_degree[:3]) + "\n" #Other centralities than just hubs: #EIgenvector Centrality is a kind of extension of degree—it looks at a combination of a node’s edges and the edges of that node’s neighbors. #Eigenvector centrality cares if you are a hub, but it also cares how many hubs you are connected to. Like second order connectivity #Betweenness centrality looks at all the shortest paths that pass through a particular node (see above). betweenness_dict = nx.betweenness_centrality(G) #eigenvector_dict = nx.eigenvector_centrality(G) #too computationally heavy for VB? nx.set_node_attributes(G, betweenness_dict, 'betweenness') # Assign each to an attribute in your network #nx.set_node_attributes(G, eigenvector_dict, 'eigenvector') sorted_betweenness = sorted(betweenness_dict.items(), key=operator.itemgetter(1), reverse=True) descriptionSelf+="Three most central concepts in me are:"+ ' , '.join(elt[0] for elt in sorted_betweenness[:3])+ "\n" #Could add other properties>>> #Community detection within Self: with modularity, different clusterm Clustered Self etc. >>> print(descriptionSelf) return G, descriptionSelf