def fast_graph_could_be_isomorphic(G1,G2): """Returns False if graphs G1 and G2 are definitely not isomorphic. True does NOT garantee isomorphism. Checks for matching degree and triangle sequences. """ # Check global properties if G1.order() != G2.order(): return False # Check local properties d1=G1.degree(with_labels=True) t1=networkx.triangles(G1,with_labels=True) props1=[ [d1[v], t1[v]] for v in d1 ] props1.sort() d2=G2.degree(with_labels=True) t2=networkx.triangles(G2,with_labels=True) props2=[ [d2[v], t2[v]] for v in d2 ] props2.sort() if props1 != props2: return False # OK... return True
def test_path(self): G = nx.path_graph(10) assert_equal(list(nx.triangles(G).values()), [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) assert_equal(nx.triangles(G), {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0})
def node_wcc(x, S, V): tS, tV = nx.triangles(S, x), nx.triangles(V, x) vtS, vtV = number_of_triangle_nodes(S, x), number_of_triangle_nodes(V, x) vtV_S = vtV - vtS result = 0.0 if tV == 0 else tS / tV * vtV / (S.number_of_nodes() - 1 + vtV_S) print("node: {}, tS: {}, tV: {}, vtS: {}, vtV: {}, vtV_S: {} wcc: {}".format(x, tS, tV, vtS, vtV, vtV_S, result)) return result
def test_holme(): for N in (5, 10, 15): for m in (1, 2, 3, 4): m0 = max(3, m+1) # must reproduce logic of the model. g = HolmeGraph.get(N=N, m=m, Pt=1, m0=m0) assert_equal(g.number_of_edges(), (N-m0)*m) for N in (5, 10, 15): for m in (1, 2, 3, 4): m0 = max(3, m+1) # must reproduce logic of the model. g = HolmeGraph.get(N=N, m=m, Pt=.5, m0=m0) assert_equal(g.number_of_edges(), (N-m0)*m) # Should return itself g0 = networkx.complete_graph(5) g = HolmeGraph.get(N=5, m=3, Pt=1, g0=g0) assert_equal(networkx.triangles(g), dict((i,4*3/2) for i in range(5))) # Test number of triangles created for new nodes. g0 = networkx.complete_graph(5) g = HolmeGraph.get(N=6, m=3, Pt=1, g0=g0) assert_equal(networkx.triangles(g)[5], 3) g0 = networkx.complete_graph(6) g = HolmeGraph.get(N=7, m=3, Pt=1, g0=g0) assert_equal(networkx.triangles(g)[6], 3) # Test number of triangles created for new nodes. def _make(): g = HolmeGraph.get(N=6, m=3, m0=5, Pt=0) return networkx.triangles(g) sizes = [_make() for _ in range(10)] assert_true(any(_[5]==0 for _ in sizes))
def fast_could_be_isomorphic(G1, G2): """Returns False if graphs are definitely not isomorphic. True does NOT guarantee isomorphism. Parameters ---------- G1, G2 : graphs The two graphs G1 and G2 must be the same type. Notes ----- Checks for matching degree and triangle sequences. """ # Check global properties if G1.order() != G2.order(): return False # Check local properties d1 = G1.degree() t1 = nx.triangles(G1) props1 = [[d1[v], t1[v]] for v in d1] props1.sort() d2 = G2.degree() t2 = nx.triangles(G2) props2 = [[d2[v], t2[v]] for v in d2] props2.sort() if props1 != props2: return False # OK... return True
def test_cubical(self): G = nx.cubical_graph() assert_equal(list(nx.triangles(G).values()), [0, 0, 0, 0, 0, 0, 0, 0]) assert_equal(nx.triangles(G,1),0) assert_equal(list(nx.triangles(G,[1,2]).values()),[0, 0]) assert_equal(nx.triangles(G,1),0) assert_equal(nx.triangles(G,[1,2]),{1: 0, 2: 0})
def test_k5(self): G = nx.complete_graph(5) assert_equal(list(nx.triangles(G).values()),[6, 6, 6, 6, 6]) assert_equal(sum(nx.triangles(G).values())/3.0,10) assert_equal(nx.triangles(G,1),6) G.remove_edge(1,2) assert_equal(list(nx.triangles(G).values()),[5, 3, 3, 5, 5]) assert_equal(nx.triangles(G,1),3)
def triangles_distribution(G, return_dictionary=False): """This returns a distribution of the number of triangles each vertex in G is involved in, amenable to applications similar to Borges, Coppersmith, Meyer, and Priebe 2011. If return_dictionary is specified, we return a dictionary indexed by vertex name, rather than just the values (as returned by default). """ if return_dictionary: return nx.triangles(G) else: return nx.triangles(G).values()
def compare(f1, f2): g1 = read_data(f1) g2 = read_data(f2) avg1, avg2 = 0, 0 for node in g1.nodes(): avg1 += g1.degree(node) for node in g2.nodes(): avg2 += g2.degree(node) print "Average degree......: ", avg1*1.0/(len(g1.nodes())), avg2*1.0/(len(g2.nodes())) trg1, trg2 = 0, 0 tr_list1 = list(nx.triangles(g1).values()) tr_list2 = list(nx.triangles(g2).values()) print "Triangles......: ", sum(tr_list1)/3, sum(tr_list2)/3
def clust(Graph): """ Returns the graph that merges artificial loops into a single node. Detects the nodes included to the triangles and merges them. Uses the extern function merge_nodes. Parameters -------- Graph : input graph with artificial loops Returns ------- G : a graph without loops; triangles of neighboring nodes are replaced by a single node  """ G = Graph.copy() size = G.number_of_nodes() for i in G.nodes(): neigh = nx.get_node_attributes(G, 'neig') index = nx.get_node_attributes(G, 'index') if (i in G.nodes() and nx.triangles(G, i))>0: n = nx.all_neighbors(G,i) l = [i] for k in n: if ((neigh[k]>2) and (nx.get_edge_attributes(G, 'length')[min(i,k), max(i,k)]<2)): l = np.append(l, k) merge_nodes(G,l,size+1,index = index[i], neig = neigh[i]) size+=1 if (i==G.number_of_nodes()): break G = nx.convert_node_labels_to_integers(G, first_label=1) return G
def calculate_num_triangles(graph): ########## need to see this ########### count = 0 triang = nx.triangles(graph) for itr in graph.nodes(): count += triang[itr] return count
def basic_stats(self): #not decided on what level to deal with this yet: #either return error un not dealing with unconnected files, #or making it deal with unconnected files: the latter. #How about with dealing with each independently. # if not nx.is_connected(g): # conl= nx.connected_components(g) # for n in conl: # turn n into graph if it isnt # calculate ec, per, cnt # how and when to visualise the subgraphs? # iterate to next n if nx.is_connected(self.nx_graph): ec = nx.eccentricity(self.nx_graph) else: ec = 'NA - graph is not connected' per = nx.periphery(self.nx_graph) cnt = nx.center(self.nx_graph) result = { #"""fast betweenness algorithm""" 'bbc': nx.brandes_betweenness_centrality(self.nx_graph), 'tn': nx.triangles(self.nx_graph), # number of triangles 'ec': ec, 'per': per, 'cnt': cnt, 'Per': self.nx_graph.subgraph(per), 'Cnt': self.nx_graph.subgraph(cnt) } return result
def get_network_property(graph): """Returns various property of the graph. It calculates the richness coefficient, triangles and transitivity coefficient. To do so, it removes self-loops *in-place*. So, there is a possibility that the graph passed as parameter has been changed. """ remove_self_loop(graph) # If number of nodes is less than three # no point in calculating these property. if len(graph.nodes()) < 3: return ({0: 0.0}, 0, 0) try: richness = nx.rich_club_coefficient(graph) except nx.NetworkXAlgorithmError: # NetworkXAlgorithmError is raised when # it fails achieve desired swaps after # maximum number of attempts. It happened # for a really small graph. But, just to # guard against those cases. richness = nx.rich_club_coefficient(graph, False) triangle = nx.triangles(graph) transitivity = nx.transitivity(graph) return (richness, triangle, transitivity)
def get_motifs(filename): import networkx as nx from math import factorial threshold = 0 f = open(filename[:-4]+'_motifs.dat','w') for i in range(0,101): threshold = float(i)/100 G = get_threshold_matrix(filename, threshold) tri_dict = nx.triangles(G) summe = 0 for node in tri_dict: summe += tri_dict[node] N = nx.number_of_nodes(G) ratio = summe / (3. * binomialCoefficient(N,3)) transi = nx.transitivity(G) if transi > 0: triads = summe / transi ratio_triads = triads / (3 * binomialCoefficient(N,3)) else: triads = 0. ratio_triads = 0. print 'threshold: %f, number of triangles: %f, ratio: %f, triads: %f, ratio: %f' %(threshold, summe/3, ratio, triads, ratio_triads) f.write("%f\t%d\t%f\t%f\t%f\n" % (threshold, summe/3, ratio, triads, ratio_triads)) f.close() print "1:threshold 2:#triangles 3:ratio-to-potential-triangles 4:triads 5:ratio-to-potential-triads"
def wcc1(g, G): q = 0 nodes = g.node.keys() nodes_len = len(nodes) for n in nodes: tG = nx.triangles(G, n) if tG != 0: tS = float(nx.triangles(g, n)) vtG = count_trinodes(n, G) vtGS = count_trinodes(n, G, g) q += tS / tG * vtG / (nodes_len - 1 + vtGS) return q / nodes_len
def test_fast_versions_properties_threshold_graphs(self): cs='ddiiddid' G=nxt.threshold_graph(cs) assert_equal(nxt.density('ddiiddid'), nx.density(G)) assert_equal(sorted(nxt.degree_sequence(cs)), sorted(G.degree().values())) ts=nxt.triangle_sequence(cs) assert_equal(ts, list(nx.triangles(G).values())) assert_equal(sum(ts) // 3, nxt.triangles(cs)) c1=nxt.cluster_sequence(cs) c2=list(nx.clustering(G).values()) assert_almost_equal(sum([abs(c-d) for c,d in zip(c1,c2)]), 0) b1=nx.betweenness_centrality(G).values() b2=nxt.betweenness_sequence(cs) assert_true(sum([abs(c-d) for c,d in zip(b1,b2)]) < 1e-14) assert_equal(nxt.eigenvalues(cs), [0, 1, 3, 3, 5, 7, 7, 8]) # Degree Correlation assert_true(abs(nxt.degree_correlation(cs)+0.593038821954) < 1e-12) assert_equal(nxt.degree_correlation('diiiddi'), -0.8) assert_equal(nxt.degree_correlation('did'), -1.0) assert_equal(nxt.degree_correlation('ddd'), 1.0) assert_equal(nxt.eigenvalues('dddiii'), [0, 0, 0, 0, 3, 3]) assert_equal(nxt.eigenvalues('dddiiid'), [0, 1, 1, 1, 4, 4, 7])
def tpr_metric(num_nodes_s, comm_subgraph): #Dictionary where nodes are the keys and values are the number of triangles that include the node as a vertex triangles = nx.triangles(comm_subgraph) tri_count = sum(x > 0 for x in triangles.values()) tpr = tri_count / num_nodes_s return tpr
def analyze_graph(G): print nx.info(G) degree_freq = nx.degree_histogram(G) diameter = nx.diameter(G) print "Diameter: {0}".format(diameter) triangles = nx.triangles(G) triangles_values = sorted(triangles.values(), reverse=True) print "Triangles: {0}".format(triangles_values)
def triangleRatio(g, center): triangleCoef = nx.triangles(g, nodes=[center])[center] deg = len(g.neighbors(center)) maxTriangles = float(deg*(deg-1))/2.0 return float(triangleCoef)/maxTriangles
def init(self, graph, initPart): """Initialize the SCDMetaData of a graph with every node in one community""" self.T = nx.triangles(graph) self.com2nodes = dict() self.node2coms = dict() for comm, nodes in initPart.items(): self.com2nodes[comm] = nodes for node in nodes: self.node2coms[node] = set([comm])
def compareAvgNumberOfTriangles(masterGraph,wordGraph,worksheet,row): masterTriangles = nx.triangles(masterGraph) wordTriangles = nx.triangles(wordGraph) numberOfTrianglesMaster = list(masterTriangles.values()) numberOfTrianglesWord = list(wordTriangles.values()) averageNumberOfTrianglesMaster = (np.mean(numberOfTrianglesMaster)/3) averageNumberOfTrianglesWord = (np.mean(numberOfTrianglesWord)/3) #worksheet.write(row,1,averageNumberOfTrianglesMaster) #worksheet.write(row,2,averageNumberOfTrianglesWord) result = False if(averageNumberOfTrianglesMaster >= averageNumberOfTrianglesWord): result = True #worksheet.write(row,3,result) if result == True: return 1 else: return -1
def getTriangles(UDG): global triangles triangles = nx.triangles(UDG) plt.hist(triangles.values(),bins = 50,log = True,facecolor='green', alpha=0.75) plt.xlabel('Number Of Triangles', fontsize = 15) plt.ylabel('Frequency', fontsize = 15) plt.title(r'$\mathrm{Triangles\ Histogram}\ $',fontsize = 15) plt.savefig("trianglesHistogram.png", format="PNG") plt.grid(True) plt.axis([-200,4500,0.1,10000]) plt.show()
def get_relevant_edges(self): """ a filter for edges returns an edge where the number of triangles could be a part of a completely connected graph of size "lower_bound" """ if self.lower_bound<3: raise AttributeError("""Lower bound needs to be >=3 nodes as this can only find triangles and above""") lowest_bound= self.lower_bound-1 should_have_x_triangles = sum(xrange(lowest_bound)) for x,y in self.graph.edges(): print("number of triangles: ",nx.triangles(self.graph,x)) print("number of triangles: ", nx.triangles(self.graph,y)) if nx.triangles(self.graph,y)>=should_have_x_triangles \ and nx.triangles(self.graph,x)>=should_have_x_triangles: try: [x,y]=sorted([x,y]) except: pass yield x,y
def cal_triangles(fn1,fn2): edges=prep.read_edges(fn2) sth={edge:0. for edge in edges} G=nx.Graph() edges_all=prep.read_edges(fn1) G.add_edges_from(edges_all) for x in sth: n1=nx.triangles(G,x[0]) n2=nx.triangles(G,x[1]) n3=max(n1,n2) n4=min(n1,n2) sth[x]=float(n4)/(n3+1) ## sth[x]=(n1+1)*(n2+1) ## sth[x]=n1+n2 return sth
def graphAnalysis(graph, top_number, save_file_path): """ Do the essential analysis to the final combined graph """ with io.open(save_file_path, 'w') as save_file: # centrality # degree centrality deg_central = nx.degree_centrality(graph) deg_central_sort = sorted(deg_central.items(), key = lambda x: x[1], reverse = True) top_deg_central_sort = deg_central_sort[:top_number] save_file.write('top %d degree centrality items,' % top_number) save_file.write(','.join('%s %s' % x for x in top_deg_central_sort)) # clustering # number of triangles: triangles() is not defined for directed graphs triangle_num = nx.triangles(graph) triangle_num_sort = sorted(triangle_num.items(), key = lambda x: x[1], reverse = True) top_triangle_num_sort = triangle_num_sort[:top_number] save_file.write('\ntop %d number of triangles including a node as one vertex,' % top_number) save_file.write(','.join('%s %s' % x for x in top_triangle_num_sort)) # clustering coefficient of node in the graph cluster_coefficient = nx.clustering(graph) cluster_coefficient_sort = sorted(cluster_coefficient.items(), key = lambda x: x[1], reverse = True) top_cluster_coefficient_sort = cluster_coefficient_sort[:top_number] save_file.write('\ntop %d clustering coefficient items,' % top_number) save_file.write(','.join('%s %s' % x for x in top_cluster_coefficient_sort)) # transitivity of the graph triangle_transitivity = nx.transitivity(graph) save_file.write('\ntransitivity of the graph,%f' % triangle_transitivity) # average clustering coefficient of the graph avg_cluster = nx.average_clustering(graph) save_file.write('\naverage clustering coefficient of the graph,%f' % avg_cluster) # clique # size of the largest clique in the graph size_largest_clique = nx.graph_clique_number(graph) save_file.write('\nsize of the largest clique in the graph,%d' % size_largest_clique) # all the cliques in the graph all_clique = nx.find_cliques(graph) # a generator list_all_clique = list(all_clique) list_all_clique_sort = sorted(list_all_clique, key = lambda x: len(x), reverse = True) list_all_clique_sort = [' '.join(clique) for clique in list_all_clique_sort] # print list_all_clique_sort save_file.write('\ncliques,') save_file.write(','.join(x for x in list_all_clique_sort))
def wcc(subgs, G): Q = 0 for g in subgs: q = 0 nodes = g.node.keys() nodes_len = len(nodes) for n in nodes: tG = nx.triangles(G, n) if tG != 0: tS = float(nx.triangles(g, n)) vtG = count_trinodes(n, G) vtGS = count_trinodes(n, G, g) q += tS / tG * vtG / (nodes_len - 1 + vtGS) # Q += q / nodes_len Q += q # return Q / len(subgs) return Q / G.number_of_nodes()
def could_be_isomorphic(G1,G2): """Returns False if graphs are definitely not isomorphic. True does NOT guarantee isomorphism. Parameters ---------- G1, G2 : NetworkX graph instances The two graphs G1 and G2 must be the same type. Notes ----- Checks for matching degree, triangle, and number of cliques sequences. """ # Check global properties if G1.order() != G2.order(): return False # Check local properties d1=G1.degree(with_labels=True) t1=networkx.triangles(G1,with_labels=True) c1=networkx.number_of_cliques(G1,with_labels=True) props1=[ [d1[v], t1[v], c1[v]] for v in d1 ] props1.sort() d2=G2.degree(with_labels=True) t2=networkx.triangles(G2,with_labels=True) c2=networkx.number_of_cliques(G2,with_labels=True) props2=[ [d2[v], t2[v], c2[v]] for v in d2 ] props2.sort() if props1 != props2: # print props1 # print props2 return False # OK... return True
def analyzeGraph(self, G): """ The structure of G can be analyzed using various graph-theoretic functions such as:""" ################# ## Centrality ################# # The degree centrality values are normalized by dividing by the maximum possible # degree in a simple graph n-1 where n is the number of nodes in G. degreeC = nx.degree_centrality(G) print "\ndegreeC" print degreeC # Closeness centrality of a node \(u\) is the reciprocal of the sum of the shortest path distances from # \(u\) to all \(n-1\) other nodes. Since the sum of distances depends on the number of nodes in the graph, # closeness is normalized by the sum of minimum possible distances \(n-1\). closenessC = nx.closeness_centrality(G, u=None, distance="d_euclid", normalized=True) print "\nclosenessC" print closenessC # Betweenness centrality of an edge \(e\) # is the sum of the fraction of all-pairs shortest paths that pass through \(e\): betweennessC = nx.edge_betweenness_centrality(G, normalized=True) print "\nbetweennessC" print betweennessC ################# ## Clustering and Components ################# # Finds the number of triangles that include a node as one vertex. print "\nno_triangles" no_triangles = nx.triangles(G, nodes=None) print no_triangles print "\nno_con_comp" no_con_comp = nx.number_connected_components(G) print no_con_comp # find the mode modegreeC = Counter(degreeC).most_common(1) moclosenessC = Counter(closenessC).most_common(1) mobetweennessC = Counter(betweennessC).most_common(1) mono_triangles = Counter(no_triangles).most_common(1) print modegreeC print moclosenessC print mobetweennessC print mono_triangles return modegreeC, moclosenessC, mobetweennessC, mono_triangles, no_con_comp
def GraphAnalysis(Anomalies_Subgraph): # Generate an output where the indexes are the Anomalies phone numbers and the columns are the graph metrics pagerank_matrix = nx.pagerank(Anomalies_Subgraph,max_iter=200) pagerank_matrix_df = pd.DataFrame.from_dict(pagerank_matrix,orient='index') pagerank_matrix_df.column=["Page Rank"] trianglecount_matrix = nx.triangles(Anomalies_Subgraph) trianglecount_matrix_df = pd.DataFrame.from_dict(trianglecount_matrix,orient='index') trianglecount_matrix_df.column=["Triangle Count"] trianglecount_matrix_df.describe() body = pd.merge(pagerank_matrix_df,trianglecount_matrix_df,right_index=True,left_index=True) body.head() body = body.reset_index() body.columns = ["Phone Number", "Page Rank", "Triangle Count"] return body
def scanStatisticOnTriangles(G, vertexHistories): nowMeanByVertex = {} maxScanStat = -1 highestDeltaNode = False for node in G.nodes(): tempNodeList = G.neighbors(node) tempNodeList.append(node) inducedSubgraph = G.subgraph(tempNodeList) inducedSubgraphSize = sum(nx.triangles(inducedSubgraph))/3 thisNodeScanStat = (inducedSubgraphSize - \ vertexHistories[ node ]) #Only does Tau=1 for now nowMeanByVertex[ node ] = inducedSubgraphSize if thisNodeScanStat > maxScanStat: maxScanStat = thisNodeScanStat highestDeltaNode = node return (maxScanStat, highestDeltaNode, nowMeanByVertex)
min_node_degree = tmp_degree elif (tmp_degree > max_node_degree): #Confronto con massimo temporaneo max_node_degree = tmp_degree elif (tmp_degree < min_node_degree): #Confronto con minimo temporaneo min_node_degree = tmp_degree #Calcolo grado medio nodi grafo G print("Calculating average degree..") average_degree = node_degree_sum / tot_nodes tot_triangles = 0 print("Calculating total triangles..") for node in G.nodes(): tot_triangles += nx.triangles(G, node) print("Calculating assortativity coefficient.. ") degree_assortativity_coefficient = nx.degree_assortativity_coefficient(G) print("Calculating global clustering coefficient (Transitivity).. ") global_clustering_coefficient = nx.transitivity(G) print("Calculating local clustering coefficient.. ") local_clustering_coefficient = {} for node in G.nodes(): local_clustering_coefficient[node] = nx.clustering(G, node) print("Calculating average clustering coefficient.. ") avg_clustering_coefficient = nx.average_clustering(G)
G.add_edges_from(edges_time[idx]) graphs.append(G) print(G.number_of_nodes()) #Calculate all stuff betweenness_centralitys = [] average_cc = [] degree_centrality = [] triangles = [] average_degree = [] for g in graphs: betweenness_centralitys.append(nx.betweenness_centrality(g)) average_cc.append(nx.average_clustering(g)) degree_centrality.append(nx.degree_centrality(g)) triangles.append(nx.triangles(g)) #average degree degrees = g.degree() sum_of_degree = 0 for n, d in degrees: sum_of_degree += d average_degree.append(sum_of_degree / g.number_of_nodes()) def max_val(dict): val = list(dict.values()) return max(val) betweenness_centralitys_max = []
def get_ais_stats(Glist, attribute, smax=True): import matrices_creation import fraudar as fr print('...ais stats for:', attribute) Glist = Glist[1:] dataset = 'ico' infodict = pickle.load( open('FILE WITH ALL ATTRIBUTES, EG ALL HASHTAGS OR ALL URLS', 'rb')) botdict = infodict for G in Glist: text = open(dataset + '_' + G.name + '_' + attribute + "_ais_stats.txt", 'w', encoding='utf-8') allAttributes = [] for n, attr in G.nodes(data=True): if attribute in attr: allAttributes.extend(attr[attribute]) for a in set(allAttributes): try: subGraphList = [ x for x, y in G.nodes(data=True) if a in y[attribute] ] subgraph = G.subgraph(subGraphList) try: coupled = matrices_creation.matrix_calculation(subgraph) coupled = matrices_creation.coupled_matrix_using_weights( subgraph, attribute) # tf = nt.produce_idf_dict(h)[1] indicesList = [n for n in range(coupled.shape[0])] fraudarScore = (fr.get_fraudar_score(coupled, indicesList)) except (UnicodeEncodeError, MemoryError): fraudarScore = 0.0 print('fraudar error', a) volume = len(subgraph.nodes()) mass = len(subgraph.edges()) if volume == 2 and mass > volume: print(a, volume, mass) if mass > 0: if isinstance(nx.triangles(subgraph), dict) > 0: triangles = sum(nx.triangles(subgraph).values()) / 3 else: triangles = nx.triangles(subgraph) else: triangles = 0 componentList = [] for c in nx.connected_components(subgraph): componentList.append(len(c)) isolated = len(componentList) Gcc = sorted(nx.connected_components(subgraph), key=len, reverse=True) try: G0 = len(Gcc[0]) except IndexError: G0 = 0 if subgraph.number_of_nodes() > 0 or subgraph.number_of_edges( ) > 0: try: A = nx.adjacency_matrix(subgraph) B = A.todense() U, s, V = linalg.svd(B) sMax = np.amax(s) except: sMax = 0 print('smax error', a) else: sMax = 0 text.write( str(volume) + "," + str(mass) + "," + str(triangles) + "," + str(G0) + "," + str(sMax) + "," + str(infodict[a]) + "," + str(botdict[a]) + "," + str(fraudarScore) + "," + str(a) + "\n") except UnicodeEncodeError: print('Unicode Error') text.close()
def triangles(NC, CA): # This function returns the number of triangles in the design G = createGraph(NC, CA) return sum(nx.triangles(G).values()) / 3
def numTriangles2(): G = nx.Graph() G.add_edges_from([(1, 2), (1, 3), (2, 3), (3, 4)]) num = sum(nx.triangles(G).values())/3 print(nx.triangles(G)) print("Number of triangles: " + str(num))
nx.draw(G, with_labels=True) cliques = list(nx.enumerate_all_cliques(G)) print(cliques) for i in range(1, len(max(cliques, key=len)) + 1): print(i, "clique:", [x for x in cliques if len(x) == i]) # In[37]: node_cliques = nx.cliques_containing_node(G, nodes=1) print(node_cliques) # ### Triangle, Transitivity, Clustering Coefficient # In[38]: print(nx.triangles(G)) print(nx.transitivity(G)) print(nx.clustering(G)) # ## Comunity # https://github.com/taynaud/python-louvain # In[39]: import community G = nx.Graph() G.add_edges_from([(1, 5), (1, 2), (2, 4), (3, 4), (3, 5), (4, 5)]) partition = community.best_partition(G) print(partition)
def get_triangle_count(self, graph): triangles = nx.triangles(graph).values() res = 0 for t in triangles: res += t return int(res / 3)
"########## Prima dell'anonimizzazione ###########\n\n") #af.degree_distribution(G_init) #print("Numero di archi: {}".format(len(nx.edges(G_init)))) scrivi_risultato(name, euristic, "Numero di archi: {}\n\n".format(len(nx.edges(G_init)))) scrivi_risultato(name, euristic, "Numero di nodi: {}\n\n".format(len(G_init))) #print("Clustering coefficient: {}".format(nx.average_clustering(G_init))) scrivi_risultato( name, euristic, "Clustering coefficient: {}\n\n".format(nx.average_clustering(G_init))) triangle_vertexes = [n for n in nx.triangles(G_init).values()] triangle_vertexes = reduce(lambda x, y: x + y, triangle_vertexes) #print("Number of triangles: {}".format(triangle_vertexes/3)) scrivi_risultato(name, euristic, "Number of triangles: {}\n\n".format(triangle_vertexes / 3)) originalPrank = nx.pagerank(G_init) originalPrank = list(originalPrank.values()) #print("Diameter: {}".format(nx.diameter(G_init))) x1, y1 = calc_xy(G_init) XY = [(x1, y1)] every_ser = XY.copy() every_lab = labels.copy() distances = defaultdict(list)
def NetworkAnalysis(G, filename='highRatingResults.txt'): #standard metrics local metrics nbr_nodes = nx.number_of_nodes(G) nbr_edges = nx.number_of_edges(G) nbr_components = nx.number_connected_components(G) F = open(filename, 'w') # t1 = "Number of nodes:" + str(nbr_nodes) # t2 = "Number of edges:" + str(nbr_edges) # t3 = "Number of connected components:" + str(nbr_components) F.write("Number of nodes:" + str(nbr_nodes) + "\n") F.write("Number of edges:" + str(nbr_edges) + "\n") F.write("Number of connected components:" + str(nbr_components) + "\n") # F.close() #betweeness betweenList = nx.betweenness_centrality(G) #print("The list of betweenness centrality is", str(betweenList), "\n") F.write("The list of betweenness centrality is" + str(betweenList) + "\n") #all the items have less than 1 betweenness centrality which indicate that there is no #item that lie inbetween the connection between two items. #degree degreeCentrality = nx.degree_centrality(G) F.write("The degrees of centrality is " + str(degreeCentrality) + "\n") #clustering coefficient #clustering coefficient for each nodes F.write("The clustering coefficients are " + str(nx.clustering(G)) + "\n") partition = community_louvain.best_partition(G) F.write("The community modularity is " + str(community_louvain.modularity(partition, G)) + "\n") #which suggest that there isn't a strong community #global network metrics (metric to explain whole network not just a part) #diameter - the max of shortest distances between nodes F.write("The diameter is " + str(nx.diameter(G)) + "\n") #density F.write("The density is " + str(nx.density(G)) + "\n") #not particularly low nor high in density #triangles F.write("The triangle is " + str(nx.triangles(G)) + "\n") #average clustering coefficient for the graph avgclu = nx.average_clustering(G) F.write("The average clustering is " + str(avgclu) + "\n") #average degree centrality tot = [] for food in degreeCentrality: item = degreeCentrality[food] tot.append(item) avgdeg = np.average(tot) F.write("The average degree centrality is " + str(avgdeg) + "\n") #average betweenness centrality l = [] for f in betweenList: item = betweenList[f] l.append(item) avgB = np.average(l) F.write("The average betweenness centrality is " + str(avgB) + "\n") F.close()
def main(): graphs=[] i=1 histnames=['density', 'Eulerian', 'number of edges', 'number of nodes', 'Chordal'] while i < len(sys.argv): if sys.argv[i] == "--hops": hops = int(sys.argv[i+1]) i=i+2 elif sys.argv[i] == "--maxDist": minDist = sys.argv[i+1] i=i+2 elif sys.argv[i] == "--structures": structStrig = sys.argv[i+1] structure = structStrig.split(',') i=i+2 else: print("Invalid argument: "+sys.argv[i]) exit(-1) i=i+1 for s in structure : padStruct.append("\'"+s+"\'") structStrig=",".join(padStruct) cursr.execute("""SELECT distinct top(100) FromFacilityKey from Cables where FromFacilityKey is not null and FromFacilityType in (\'MH\')""") for row in cursr.fetchall(): graphlet=networkx.Graph() graphlet.add_node(row.FromFacilityKey) addNeighbours(hops, row.FromFacilityKey, graphlet) filledGraph=graphlet #print(filledGraph is not None) if filledGraph is not None and networkx.number_of_edges(filledGraph)>0: L = networkx.normalized_laplacian_matrix(filledGraph) e = numpy.linalg.eigvals(L.A) ms_dict=networkx.get_edge_attributes(filledGraph,'MainServiceIndicator') pn_dict=networkx.get_edge_attributes(filledGraph,'PhaseNeutralIndicator') uo_dict=networkx.get_edge_attributes(filledGraph,'UndergroundOverheadIndicator') cm_dict=networkx.get_edge_attributes(filledGraph,'ConductorMaterial') im_dict=networkx.get_edge_attributes(filledGraph,'InsulationMaterial') bd_dict=networkx.get_edge_attributes(filledGraph,'Build_Date') rd_dict=networkx.get_edge_attributes(filledGraph,'Hop') ty_dict=networkx.get_edge_attributes(filledGraph,'Type') bd_list=bd_dict.values() bd_list.sort() c=collections.Counter(ms_dict.values()) mains=c.get('M') service=c.get('S') c=collections.Counter(pn_dict.values()) phase=c.get('P') neutral=c.get('N') c=collections.Counter(uo_dict.values()) under=c.get('U') over=c.get('O') c=collections.Counter(im_dict.values()) dom_ins=c.most_common(1)[0][0] c=collections.Counter(cm_dict.values()) dom_cond=c.most_common(1)[0][0] sb=[0,0] mh=[0,0] for k in rd_dict.keys(): if ty_dict.get(k)=='MH': mh[rd_dict.get(k)-1]+=1 elif ty_dict.get(k)=='SB': sb[rd_dict.get(k)-1]+=1 features={ 'GraphInfo': networkx.info(filledGraph), 'density': networkx.density(filledGraph), 'histogram': networkx.degree_histogram(filledGraph), 'average degree':networkx.average_neighbor_degree(filledGraph), 'number of edges':networkx.number_of_edges(filledGraph), 'number of nodes':networkx.number_of_nodes(filledGraph), 'strong connectedness':networkx.is_strongly_connected(filledGraph), #'clustering coeff': cc, 'eigen values': bijectiveMapper(e.tolist()), 'eigenkey': bijectiveMapper(e.tolist()), 'Eulerian': networkx.is_eulerian(filledGraph), 'Chordal': networkx.is_chordal(filledGraph), 'No. of Triangles': networkx.triangles(filledGraph), 'Earliest_Date':bd_list[0], 'Total_mains':mains, 'Total_neutral': neutral, 'Total_phase':phase, 'Total_service': service, 'Total_underground':under, 'Total_over':over, 'Dominant_Insulator':dom_ins, 'Dominant_Conductor':dom_cond, 'Service Box Hop 1': sb[0], 'Service Box Hop 2': sb[1], 'Manhole Hop 1': mh[0], 'Manhole Hop 2': mh[1] } graphs.append(features) with open('features2.json', 'w') as f: json.dump(graphs, f) print len(graphs) #collating full features for s in histnames: print s t_feature=[o.get(s) for o in graphs] plt.figure() plt.hist(t_feature) plt.title('HISTOGRAM OF '+s) plt.xlabel(s) plt.ylabel('no. of structures') plt.savefig('.\\'+s+'_hist.jpg') plt.close() cnxn.close()
kclique_clusters = {} for i in range(2, nx.graph_clique_number(graph) + 1): clique = list(nx.community.k_clique_communities(graph, i)) kclique_clusters[i] = list(clique[0]) # get Kernighan bisection communities kernighan_bisection = nx.community.kernighan_lin_bisection(graph) # get communities using the Girvan–Newman method. comp = nx.community.girvan_newman(graph) girvan_newman_clusters = tuple(sorted(c) for c in next(comp)) # get square clusters square_clusters = nx.square_clustering(graph) # get triangles number per node nodes_triangles_nb = nx.triangles(graph) # get nodes pagerank nodes_pagerank = nx.pagerank(graph) # save graph informations in pickle files pickle.dump(girvan_newman_clusters, open('girvan_newman_clusters', 'wb')) pickle.dump(clustering_nodes_coef, open('clustering_nodes_coef', 'wb')) pickle.dump(kernighan_bisection, open('kernighan_bisection', 'wb')) pickle.dump(nodes_triangles_nb, open('nodes_triangles_nb', 'wb')) pickle.dump(louvain_clusters, open('louvain_clusters', 'wb')) pickle.dump(kclique_clusters, open('kclique_clusters', 'wb')) pickle.dump(square_clusters, open('square_clusters', 'wb')) pickle.dump(nodes_pagerank, open('nodes_pagerank', 'wb'))
print("How many times a vertix belong to a triangle?") for i in range(len(G)): print("The vertice: " + str(i) + " belong to " + str(vertices[i]) + " triangles") print("Total number of triangles: ", count) # print("\n<----------GeeksForGeeks trace method---------->") # trace.V = 100 # number of nodes # start = time.process_time() # count = trace.triangleInGraph(adjacency) # print("Execution time: ", (time.process_time() - start)) # print("Total number of triangles: ", count) print("\n<----------NetworkX built-in function---------->") start = time.process_time() numTriangles = sum(nx.triangles(G).values()) / 3 print("Execution time: ", (time.process_time() - start)) print("Total number of triangles: ", numTriangles) print("How many times a vertix belong to a triangle?") print(nx.triangles(G)) print("\n<----------Sparse matrix schemes---------->") print("Adjacency:") print(repr(adjacency)) print("COO:") coo = nx.to_scipy_sparse_matrix(G, format='coo') print(coo) print("CSR:") csr = nx.to_scipy_sparse_matrix(G, format='csr') print(csr) print("CSC:")
nodelist = hosts_list + node_l ''' Uncomment to load the precomputed embeding as features # loadading embeding print('Loading embeding') X = np.load('X_all.npy') node_emb = X[:,0].astype('int') node_emb = node_emb.astype('str') X = X[:,1:] ''' ## Computing features print('Computing pagerank') pagerank = nx.pagerank(G) print('Computing triangles') triangles = nx.triangles(G) print('Computing degree centrality') deg_centrality = nx.degree_centrality(G) print('Computing Core number') core_number = nx.core_number(G) print('Computing color number') color_number = nx.algorithms.coloring.greedy_color(G) # Computing feature matrix features = [] features_dict = dict() for i in nodelist: features.append([ pagerank[i], triangles[i], deg_centrality[i], core_number[i], color_number[i] ])
import networkx as nx G = nx.read_edgelist("edges.txt", delimiter=",") triangles = sum(list(nx.triangles(G).values())) print("number of triangles\t" + str(triangles))
def convote_graph(): print '--CONSTRUCTION OF THE CONVOTE GRAPH--' plattScaling = PlattScaling() plattScaling.fit_convote() print 'Platt Scaling initialized' with open( os.path.join('..', 'data', 'convote', 'edges_individual_document.v1.1.csv'), 'rb') as infile: debate_to_speaker = {} for line in infile: line = line.split(',') debate, speaker, _, vote = line[0].split('_') if 'N' in vote: vote = False else: vote = True if not debate in debate_to_speaker: debate_to_speaker[debate] = {} if not speaker in debate_to_speaker[debate]: debate_to_speaker[debate][speaker] = [ vote, [plattScaling.predict_proba([float(line[2])])[0][1]] ] else: debate_to_speaker[debate][speaker][1].append( plattScaling.predict_proba([float(line[2])])[0][1]) for debate in debate_to_speaker.keys(): for speaker in debate_to_speaker[debate].keys(): debate_to_speaker[debate][speaker][1] = np.mean( debate_to_speaker[debate][speaker][1]) #print debate_to_speaker['052']['400077'] G = nx.Graph() for debate in debate_to_speaker.keys(): speakers = list(debate_to_speaker[debate].keys()) for i in range(len(speakers)): for j in range(i + 1, len(speakers)): agree = debate_to_speaker[debate][speakers[i]][ 0] == debate_to_speaker[debate][speakers[j]][0] proba_agreement = debate_to_speaker[debate][speakers[i]][ 1] * debate_to_speaker[debate][speakers[j]][1] + ( 1 - debate_to_speaker[debate][speakers[i]][1]) * ( 1 - debate_to_speaker[debate][speakers[j]][1]) if G.has_edge(speakers[i], speakers[j]): G[speakers[i]][speakers[j]]['vote_agree'].append(agree) G[speakers[i]][speakers[j]]['sentiment_agree'].append( proba_agreement) else: G.add_edge(speakers[i], speakers[j], vote_agree=[agree], sentiment_agree=[proba_agreement]) positive_edges = 0 for edge in G.edges(): G[edge[0]][edge[1]]['vote_agree'] = int( sum(G[edge[0]][edge[1]]['vote_agree']) >= (len(G[edge[0]][edge[1]]['vote_agree']) + 1) / 2) G[edge[0]][edge[1]]['sentiment_agree'] = np.mean( G[edge[0]][edge[1]]['sentiment_agree']) if G[edge[0]][edge[1]]['vote_agree'] == True: positive_edges = positive_edges + 1 number_triangles = 0 for node in G.nodes(): number_triangles = number_triangles + nx.triangles(G, node) number_triangles = number_triangles / 3 print "Created the graph with {} edges ( {:.0f}% positive ) and {} triangles".format( G.number_of_edges(), float(positive_edges) / G.number_of_edges() * 100, number_triangles) nx.write_gpickle(G, os.path.join('..', 'data', 'convote', 'graph.pkl')) print '--GRAPH STORED IN DATA--'
def getTriangles(self, data, header): triangles = nx.triangles(self.graph, nodes=[n for n in data]) header += ['triangles'] for n in data: data[n].append(triangles[n]) return data, header
def numTriangles(G): #each triangle is counted three times, once at each node num = sum(nx.triangles(G).values())/3 print("Number of triangles: " + str(num))
def evaluate_graph(thisReplicate, invariant): """ Take the NetworkX graph in thisReplicate and evaluate it using the invariant specified. Return said value. """ doAllInvariants = False if invariant == -1: doAllInvariants = True print "Doing all invariants at once, returning an array of them instead of a single value..." thisReplicateInvariantValue = [] ######## # SIZE # ######## if (invariant == 1 or doAllInvariants): size = thisReplicate.number_of_edges() if doAllInvariants: thisReplicateInvariantValue.append(size) else: thisReplicateInvariantValue = size ############## # MAX DEGREE # ############## if (invariant == 2 or doAllInvariants): degArr = [] maxDegree = -1 replicateDegree = thisReplicate.degree() for entry in replicateDegree: if maxDegree < entry: maxDegree = entry degArr.append(replicateDegree[entry]) writeArrayToFile( degArr, os.path.join("bench", str(thisReplicate.number_of_nodes()), "degArr.txt")) degArr = np.array(degArr) np.save( os.path.join("bench", str(thisReplicate.number_of_nodes()), "degArr.npy"), degArr) if doAllInvariants: thisReplicateInvariantValue.append(maxDegree) else: thisReplicateInvariantValue = maxDegree ################## # EIGENVALUE MAD # ################## if (invariant == 4 or doAllInvariants): thisEigenvalues, thisEigenvectors = linalg.eig( nx.adj_matrix(thisReplicate)) thisEigenvalues.sort() if len(thisEigenvalues) > 0: MADe = float(thisEigenvalues[-1]) np.save( os.path.join("bench", str(thisReplicate.number_of_nodes()), "MAD.npy"), MADe) else: MADe = 0 if doAllInvariants: thisReplicateInvariantValue.append(MADe) else: thisReplicateInvariantValue = MADe ################## # SCAN STATISTIC # ################## if (invariant == 5 or doAllInvariants): maxScanStat = -1 scanStatArr = [] for node in thisReplicate.nodes(): tempNodeList = thisReplicate.neighbors(node) tempNodeList.append( node ) #Append the central node to the neighborhood before inducing the subgraph, since it is left out by neighbors(.) inducedSubgraph = thisReplicate.subgraph(tempNodeList) thisNodeScanStat = inducedSubgraph.number_of_edges( ) #The number of edges in the 1-hop neighborhood of node scanStatArr.append(thisNodeScanStat) if thisNodeScanStat > maxScanStat: maxScanStat = thisNodeScanStat writeArrayToFile( scanStatArr, os.path.join("bench", str(thisReplicate.number_of_nodes()), "scanStatArr.txt")) scanStatArr = np.array(scanStatArr) np.save( os.path.join("bench", str(thisReplicate.number_of_nodes()), "scanStatArr.npy"), scanStatArr) if doAllInvariants: thisReplicateInvariantValue.append(maxScanStat) else: thisReplicateInvariantValue = maxScanStat ################# # NUM TRIANGLES # ################# if (invariant == 6 or doAllInvariants): triArr = [] triangleList = nx.triangles( thisReplicate ) #This returns a list with the number of triangles each node participates in for vertex in (triangleList): triArr.append(int(round(triangleList[vertex] / 3.0))) triangles = sum(triangleList.values()) / 3 writeArrayToFile( triArr, os.path.join("bench", str(thisReplicate.number_of_nodes()), "triArr.txt")) triArr = np.array(triArr) np.save( os.path.join("bench", str(thisReplicate.number_of_nodes()), "triArr.npy"), triArr) if doAllInvariants: thisReplicateInvariantValue.append(triangles) else: thisReplicateInvariantValue = triangles ########################## # Clustering Coefficient # ########################## if (invariant == 8 or doAllInvariants): try: cc = nx.average_clustering(thisReplicate) ccArr = nx.clustering(thisReplicate) writeArrayToFile( ccArr.values(), os.path.join("bench", str(thisReplicate.number_of_nodes()), "ccArr.txt")) ccArr = np.array(ccArr) ccArr = ccArr[()].values() np.save( os.path.join("bench", str(thisReplicate.number_of_nodes()), "ccArr.npy"), ccArr) except ZeroDivisionError: #This only occurs with degenerate Graphs --GAC cc = -999 if doAllInvariants: thisReplicateInvariantValue.append(cc) else: thisReplicateInvariantValue = cc ####################### # Average Path Length # ####################### if (invariant == 9 or doAllInvariants): apl = -1 * nx.average_shortest_path_length( thisReplicate ) #Since smaller APL is in favor of HA over H0, we use -1 * APL instead of APL. --GAC pairsArr = length = nx.all_pairs_shortest_path(thisReplicate) aplArr = [] for vert in thisReplicate: total = 0 for neigh in range(len(pairsArr[vert])): total += (len(pairsArr[vert][neigh]) - 1) aplArr.append(ceil(total / float(len(pairsArr[vert])))) writeArrayToFile( aplArr, os.path.join("bench", str(thisReplicate.number_of_nodes()), "aplArr.txt")) aplArr = np.array(aplArr) np.save( os.path.join("bench", str(thisReplicate.number_of_nodes()), "aplArr.npy"), aplArr) if doAllInvariants: thisReplicateInvariantValue.append(apl) else: thisReplicateInvariantValue = apl ########## # DEGREE # ########## if (doAllInvariants): thisReplicateInvariantValue.append(thisReplicate.number_of_nodes()) ############## # GREEDY MAD # ############## # We put this last because it actually deconstructs the Graph if (invariant == 3 or doAllInvariants): maxAverageDegree = -1 nodeList = thisReplicate.nodes() while nodeList: #While there's something left degreeList = thisReplicate.degree(nodeList) smallestDegree = len(nodeList) + 3 smallestID = -1 for nodeID in range( 0, len(degreeList )): #Search for the node with the smallest degree if thisReplicate.degree(nodeList[nodeID]) < smallestDegree: smallestID = nodeList[nodeID] smallestDegree = thisReplicate.degree(nodeList[nodeID]) #Calculate the average degree sumDegree = 0.0 for degree in degreeList: sumDegree += degree if sumDegree > 0: thisAverageDegree = sumDegree / float(len(nodeList)) else: thisAverageDegree = 0 #If this average degree is larger than any we've seen previously, store it if thisAverageDegree > maxAverageDegree: maxAverageDegree = thisAverageDegree #Remove the vertex with the smallest degree #**** DISA EDIT***** thisReplicate.delete_node(smallestID) thisReplicate.remove_node(smallestID) nodeList.remove(smallestID) if doAllInvariants: thisReplicateInvariantValue.append(maxAverageDegree) else: thisReplicateInvariantValue = maxAverageDegree return thisReplicateInvariantValue
def get_graph(Mat_D, Threshold, percentageConnections=False, complet=False): import scipy.io as sio import numpy as np import networkx as nx import pandas as pd import os Data = sio.loadmat(Mat_D) matX = Data['Correlation'] #[:tamn,:tamn] labels = Data['labels'] print(np.shape(matX)) print(np.shape(labels)) print(np.min(matX), np.max(matX)) if percentageConnections: if percentageConnections > 0 and percentageConnections < 1: for i in range(-100, 100): per = np.sum(matX > i / 100.) / np.size(matX) if per <= Threshold: Threshold = i / 100. break print(Threshold) else: print('The coefficient is outside rank') #Lista de conexion del grafo row, col = np.shape(matX) e = [] for i in range(1, row): for j in range(i): if complet: e.append((labels[i], labels[j], matX[i, j])) else: if matX[i, j] > Threshold: e.append((labels[i], labels[j], matX[i, j])) print(np.shape(e)[0], int(((row - 1) * row) / 2)) #Generar grafo G = nx.Graph() G.add_weighted_edges_from(e) labelNew = list(G.nodes) #Metricas por grafo (ponderados) Dpc = nx.degree_pearson_correlation_coefficient(G, weight='weight') cluster = nx.average_clustering(G, weight='weight') #No ponderados estra = nx.estrada_index(G) tnsity = nx.transitivity(G) conNo = nx.average_node_connectivity(G) ac = nx.degree_assortativity_coefficient(G) #Metricas por nodo tam = 15 BoolCenV = False BoolLoad = False alpha = 0.1 beta = 1.0 katxCN = nx.katz_centrality_numpy(G, alpha=alpha, beta=beta, weight='weight') bcen = nx.betweenness_centrality(G, weight='weight') av_nd = nx.average_neighbor_degree(G, weight='weight') ctr = nx.clustering(G, weight='weight') ranPaN = nx.pagerank_numpy(G, weight='weight') Gol_N = nx.hits_numpy(G) Dgc = nx.degree_centrality(G) cl_ce = nx.closeness_centrality(G) cluster_Sq = nx.square_clustering(G) centr = nx.core_number(G) cami = nx.node_clique_number(G) camiN = nx.number_of_cliques(G) trian = nx.triangles(G) colorG = nx.greedy_color(G) try: cenVNum = nx.eigenvector_centrality_numpy(G, weight='weight') tam = tam + 1 BoolCenV = True except TypeError: print( "La red es muy pequeña y no se puede calcular este parametro gil") except: print('NetworkXPointlessConcept: graph null') if Threshold > 0: carga_cen = nx.load_centrality(G, weight='weight') #Pesos positivos BoolLoad = True tam = tam + 1 #katxC=nx.katz_centrality(G, alpha=alpha, beta=beta, weight='weight') #cenV=nx.eigenvector_centrality(G,weight='weight') #cenV=nx.eigenvector_centrality(G,weight='weight') #Golp=nx.hits(G) #Gol_si=nx.hits_scipy(G) #ranPa=nx.pagerank(G, weight='weight') #ranPaS=nx.pagerank_scipy(G, weight='weight') matrix_datos = np.zeros((tam, np.shape(labelNew)[0])) tam = 15 print(np.shape(matrix_datos)) lim = np.shape(labelNew)[0] for i in range(lim): roi = labelNew[i] #print(roi) matrix_datos[0, i] = katxCN[roi] matrix_datos[1, i] = bcen[roi] matrix_datos[2, i] = av_nd[roi] matrix_datos[3, i] = ctr[roi] matrix_datos[4, i] = ranPaN[roi] matrix_datos[5, i] = Gol_N[0][roi] matrix_datos[6, i] = Gol_N[1][roi] matrix_datos[7, i] = Dgc[roi] matrix_datos[8, i] = cl_ce[roi] matrix_datos[9, i] = cluster_Sq[roi] matrix_datos[10, i] = centr[roi] matrix_datos[11, i] = cami[roi] matrix_datos[12, i] = camiN[roi] matrix_datos[13, i] = trian[roi] matrix_datos[14, i] = colorG[roi] if BoolCenV: matrix_datos[15, i] = cenVNum[roi] tam = tam + 1 if BoolLoad: matrix_datos[16, i] = carga_cen[roi] tam = tam + 1 #matrix_datos[0,i]=katxC[roi] #matrix_datos[2,i]=cenV[roi] #matrix_datos[7,i]=Golp[0][roi] #matrix_datos[9,i]=Gol_si[0][roi] #matrix_datos[10,i]=Golp[1][roi] #matrix_datos[12,i]=Gol_si[1][roi] #matrix_datos[22,i]=ranPa[roi] #matrix_datos[24,i]=ranPaS[roi] FuncName = [ 'degree_pearson_correlation_coefficient', 'average_clustering', 'estrada_index', 'transitivity', 'average_node_connectivity', 'degree_assortativity_coefficient', 'katz_centrality_numpy', 'betweenness_centrality', 'average_neighbor_degree', 'clustering', 'pagerank_numpy', 'hits_numpy0', 'hits_numpy1', 'degree_centrality', 'closeness_centrality', 'square_clustering', 'core_number', 'node_clique_number', 'number_of_cliques', 'triangles', 'greedy_color', 'eigenvector_centrality_numpy', 'load_centrality' ] frame = pd.DataFrame(matrix_datos) frame.columns = labelNew frame.index = FuncName[6:tam] Resul = os.getcwd() out_data = Resul + '/graph_metrics.csv' out_mat = Resul + '/graph_metrics_global.mat' frame.to_csv(out_data) sio.savemat( out_mat, { FuncName[0]: Dpc, FuncName[1]: cluster, FuncName[2]: estra, FuncName[3]: tnsity, FuncName[4]: conNo, FuncName[5]: ac }) return out_data, out_mat
def community_core(self, n): if self.triangles is None: self.triangles = sum(nx.triangles(self.G).values()) / 3 return nx.triangles(self.G, n) / self.triangles
def triang(graph): """triang""" return np.asarray(list(nx.triangles(graph).values())).mean()
def CLNB(G, method): node_num = nx.number_of_nodes(G) edge_num = nx.number_of_edges(G) alpha = math.log2(edge_num / (node_num * (node_num - 1) / 2)) degree_pair = {} M = node_num * (node_num - 1) / 2 s = M / edge_num - 1 logs = math.log2(s) edge = nx.edges(G) degree_list = [nx.degree(G, v) for v in range(G.number_of_nodes())] # 计算每个顶点的role role_list = [nx.triangles(G, w) for w in range(node_num)] # 三角形个数 for w in G: triangle = role_list[w] numerator = triangle + 1 d = degree_list[w] non_triangle = d * (d - 1) / 2 - triangle denominator = non_triangle + 1 role_list[w] = numerator / denominator # end for # 计算图中不同的边的个数 distinct_degree_list = list(set(degree_list)) size = len(distinct_degree_list) for i in range(size): for j in range(i, size): (di, dj) = pair(distinct_degree_list[i], distinct_degree_list[j]) degree_pair[di, dj] = 0 for u, v in edge: d1 = nx.degree(G, u) d2 = nx.degree(G, v) d1, d2 = pair(d1, d2) degree_pair[d1, d2] = degree_pair[d1, d2] + 1 # 计算连接的互信息 self_Connect_dict = {} for x in range(size): k_x = distinct_degree_list[x] for y in range(x, size): k_y = distinct_degree_list[y] (k_n, k_m) = pair(k_x, k_y) if (degree_pair[k_n, k_m] == 0): self_Connect_dict[k_n, k_m] = alpha self_Connect_dict[k_m, k_n] = alpha else: self_Connect_dict[k_n, k_m] = math.log2(degree_pair[k_n, k_m] / edge_num) self_Connect_dict[k_m, k_n] = math.log2(degree_pair[k_n, k_m] / edge_num) # 计算节点对公共邻居之间相连的边的个数 ebunch = nx.non_edges(G) neighbor_dict = {} for m, n in ebunch: com_nei = nx.common_neighbors(G, m, n) i = 0 for x in com_nei: for y in com_nei: if (m != n) & (G.has_edge(x, y)): i = i + 1 neighbor_dict[m, n] = i sim_dict = {} # 存储相似度的字典 # 计算相似度 min_value = M ebunch = nx.non_edges(G) for u, v in ebunch: s = 0 for w in nx.common_neighbors(G, u, v): if method == 'CN': s += logs + math.log2(role_list[w]) elif method == 'AA': s += 1 / math.log2( degree_list[w]) * (logs + math.log2(role_list[w])) else: # RA s += 1 / degree_list[w] * (logs + math.log2(role_list[w])) # end if # end for if s != 0: sim_dict[(u, v)] = s * (1 + neighbor_dict[u, v]) + self_Connect_dict[ degree_list[u], degree_list[v]] min_value = min(s, min_value) # end if # end for if min_value < 0: min_value *= -1 for k in sim_dict.keys(): sim_dict[k] += min_value # end for # end if return sim_dict
def test_empty(self): G = nx.Graph() assert list(nx.triangles(G).values()) == []
import networkx as nx import numpy as np n = 100 k = 4 p = .3 tries = 1000 recover_p = [] numtris = [] tm = [] tstd = [] g1 = nx.connected_watts_strogatz_graph(n, k, 0, tries) tri = float(sum(nx.triangles(g1).values()) / 3) # tri = (k-3)*n # when n is large relative to k for p in range(0, 11, 1): p = p / 10.0 for i in range(1000): g = nx.connected_watts_strogatz_graph(n, k, p, tries) numtri = float(sum(nx.triangles(g).values()) / 3) # number of triangles numtris.append(numtri) numtri = numtri - (n * k) * ( float(k) / (n - k) ) # adjust for k/(n-k) prob of rewiring to make new triangle by chance newp = (numtri / tri) # recovered w-s p recover_p.append(newp) tstd.append(np.std(numtris)) tm.append(np.mean(numtris))
def makeMeasures(self, network, exclude): """Make the network measures""" # fazer condicional para cada medida, se não estiver na exclude[], # fazer medida de tempo e guardar como tupla no g = network.g gu = network.gu timings = [] T = t.time() self.N = network.g.number_of_nodes() self.E = network.g.number_of_edges() self.E_ = network.gu.number_of_edges() self.edges = g.edges(data=True) self.nodes = g.nodes(data=True) timings.append((t.time() - T, "edges and nodes")) T = t.time() self.degrees = dict(g.degree()) self.nodes_ = sorted(g.nodes(), key=lambda x: self.degrees[x]) self.degrees_ = [self.degrees[i] for i in self.nodes_] self.in_degrees = dict(g.in_degree()) self.in_degrees_ = [self.in_degrees[i] for i in self.nodes_] self.out_degrees = dict(g.out_degree()) self.out_degrees_ = [self.out_degrees[i] for i in self.nodes_] timings.append((t.time() - T, "in_out_total_degrees")) T = t.time() self.strengths = dict(g.degree(weight="weight")) self.nodes__ = sorted(g.nodes(), key=lambda x: self.strengths[x]) self.strengths_ = [self.strengths[i] for i in self.nodes_] self.in_strengths = dict(g.in_degree(weight="weight")) self.in_strengths_ = [self.in_strengths[i] for i in self.nodes_] self.out_strengths = dict(g.out_degree(weight="weight")) self.out_strengths_ = [self.out_strengths[i] for i in self.nodes_] timings.append((t.time() - T, "in_out_total_strengths")) # symmetry measures self.asymmetries = asymmetries = [] self.disequilibrium = disequilibriums = [] self.asymmetries_edge_mean = asymmetries_edge_mean = [] self.asymmetries_edge_std = asymmetries_edge_std = [] self.disequilibrium_edge_mean = disequilibrium_edge_mean = [] self.disequilibrium_edge_std = disequilibrium_edge_std = [] for node in self.nodes_: if not self.degrees[node]: asymmetries.append(0.) disequilibriums.append(0.) asymmetries_edge_mean.append(0.) asymmetries_edge_std.append(0.) disequilibrium_edge_mean.append(0.) disequilibrium_edge_std.append(0.) else: asymmetries.append( (self.in_degrees[node] - self.out_degrees[node]) / self.degrees[node]) disequilibriums.append( (self.in_strengths[node] - self.out_strengths[node]) / self.strengths[node]) edge_asymmetries = ea = [] edge_disequilibriums = ed = [] predecessors = g.predecessors(node) successors = g.successors(node) for pred in predecessors: if pred in successors: ea.append(0.) ed.append( (g[pred][node]['weight'] - g[node][pred]['weight']) / self.strengths[node]) else: ea.append(1.) ed.append(g[pred][node]['weight'] / self.strengths[node]) for suc in successors: if suc in predecessors: pass else: ea.append(-1.) ed.append(-g[node][suc]['weight'] / self.strengths[node]) asymmetries_edge_mean.append(n.mean(ea)) asymmetries_edge_std.append(n.std(ea)) disequilibrium_edge_mean.append(n.mean(ed)) disequilibrium_edge_std.append(n.std(ed)) if "weighted_directed_betweenness" not in exclude: T = t.time() self.weighted_directed_betweenness = x.betweenness_centrality( g, weight="weight") self.weighted_directed_betweenness_ = [ self.weighted_directed_betweenness[i] for i in self.nodes_ ] timings.append((t.time() - T, "weighted_directed_betweenness")) if "unweighted_directed_betweenness" not in exclude: T = t.time() self.unweighted_directed_betweenness = x.betweenness_centrality(g) timings.append((t.time() - T, "unweighted_directed_betweenness")) if "weighted_undirected_betweenness" not in exclude: T = t.time() self.weighted_undirected_betweenness = x.betweenness_centrality( gu, weight="weight") timings.append((t.time() - T, "weighted_undirected_betweenness")) if "unweighted_undirected_betweenness" not in exclude: T = t.time() self.weighted_undirected_betweenness = x.betweenness_centrality(gu) timings.append((t.time() - T, "unweighted_undirected_betweenness")) if "wiener" not in exclude: T = t.time() self.wiener = x.wiener_index(g, weight="weight") timings.append((t.time() - T, "weiner")) if "closeness" not in exclude: T = t.time() self.closeness = x.vitality.closeness_vitality(g, weight="weight") timings.append((t.time() - T, "closeness")) if "transitivity" not in exclude: T = t.time() self.transitivity = x.transitivity(g) timings.append((t.time() - T, "transitivity")) if "rich_club" not in exclude: T = t.time() self.rich_club = x.rich_club_coefficient(gu) timings.append((t.time() - T, "rich_club")) if "weighted_clustering" not in exclude: T = t.time() self.weighted_clusterings = x.clustering(network.gu, weight="weight") self.weighted_clusterings_ = [ self.weighted_clusterings[i] for i in self.nodes_ ] timings.append((t.time() - T, "weighted_clustering")) if "clustering" not in exclude: T = t.time() self.clusterings = x.clustering(network.gu) self.clusterings_ = [self.clusterings[i] for i in self.clusterings] timings.append((t.time() - T, "clustering")) if "triangles" not in exclude: T = t.time() self.triangles = x.triangles(gu) timings.append((t.time() - T, "clustering")) if "n_weakly_connected_components" not in exclude: T = t.time() self.n_weakly_connected_components = x.number_weakly_connected_components( g) timings.append((t.time() - T, "n_weakly_connected_components")) if "n_strongly_connected_components" not in exclude: T = t.time() self.n_strongly_connected_components = x.number_strongly_connected_components( g) timings.append((t.time() - T, "n_strongly_connected_components")) T = t.time() foo = [i for i in x.connected_component_subgraphs(gu)] bar = sorted(foo, key=lambda x: x.number_of_nodes(), reverse=True) self.component = c = bar[0] timings.append((t.time() - T, "component")) T = t.time() self.diameter = x.diameter(c) self.radius = x.radius(c) self.center = x.center(c) self.periphery = x.periphery(c) timings.append((t.time() - T, "radius_diameter_center_periphery")) self.timings = timings T = t.time() self.n_connected_components = x.number_connected_components(gu) nodes = [] nodes_components = [ foo.nodes() for foo in x.connected_component_subgraphs(gu) ][:1] for nodes_ in nodes_components: nodes += nodes_ self.periphery_ = nodes self.timings = timings
def describe_graph(graph): # number of nodes n_nodes = graph.number_of_nodes() print(format('Number of nodes:', '35s'), n_nodes) # number of edges n_edges = graph.number_of_edges() print(format('Number of edges:', '35s'), n_edges) # density density = nx.density(graph) print(format('Density:', '35s'), density) # assortativity assortativity = nx.degree_assortativity_coefficient(graph) print(format('Assortativity:', '35s'), assortativity) # network transitivity transitivity = nx.transitivity(graph) print(format('Transitivity:', '35s'), transitivity) # average clustering index average_clustering = nx.average_clustering(graph) print(format('Average clustering:', '35s'), average_clustering) # average shortest path length average_shortest_path_length = nx.average_shortest_path_length(graph) print(format('Average shortest path length:', '35s'), average_shortest_path_length) # degree centrality centrality_degree = np.array( [c for n, c in nx.degree_centrality(graph).items()]) print(format('Max degree centrality:', '35s'), centrality_degree.max()) print(format('Min degree centrality:', '35s'), centrality_degree.min()) print(format('Average degree centrality:', '35s'), centrality_degree.mean()) # eigenvector centrality centrality_eigenvector = np.array( [c for n, c in nx.eigenvector_centrality(graph).items()]) print(format('Max eigenvector centrality:', '35s'), centrality_eigenvector.max()) print(format('Min eigenvector centrality:', '35s'), centrality_eigenvector.min()) print(format('Average eigenvector centrality:', '35s'), centrality_eigenvector.mean()) # betweenness centrality centrality_betweenness = np.array( [c for n, c in nx.betweenness_centrality(graph).items()]) print(format('Max betweenness centrality:', '35s'), centrality_betweenness.max()) print(format('Min betweenness centrality:', '35s'), centrality_betweenness.min()) print(format('Average betweenness centrality:', '35s'), centrality_betweenness.mean()) # degree degree_array = np.array([d for n, d in graph.degree()]) print(format('Max degree:', '35s'), degree_array.max()) print(format('Min degree:', '35s'), degree_array.min()) print(format('Average degree:', '35s'), degree_array.mean()) # triangle triangle_array = np.array([t for n, t in nx.triangles(graph).items()]) print(format('Number of triangles:', '35s'), triangle_array.sum() / 3) print(format('Max number of triangles:', '35s'), triangle_array.max()) print(format('Average number of triangles:', '35s'), triangle_array.mean()) # core kcore_array = np.array([c for n, c in nx.core_number(graph).items()]) print(format('Max k-core number:', '35s'), kcore_array.max()) description = { 'n_nodes': n_nodes, 'n_edges': n_edges, 'density': density, 'assortativity': assortativity, 'transitivity': transitivity, 'average_clustering': average_clustering, 'average_shortest_path_length': average_shortest_path_length, 'centrality_degree': centrality_degree, 'centrality_eigenvector': centrality_eigenvector, 'centrality_betweenness': centrality_betweenness, 'df': pd.DataFrame({ 'degree': degree_array, 'triangle': triangle_array, 'kcore': kcore_array, 'centrality_degree': centrality_degree, 'centrality_eigenvector': centrality_eigenvector, 'centrality_betweenness': centrality_betweenness }) } return description
# Zero out the main diagonal because we are # interested only in indices (i,j) with i != j np.fill_diagonal(A, 0) np.fill_diagonal(B, 0) # Compute the local sensitivity at distance s for 0 <= s <= n lsd = local_sensitivity_dist(A, B, n) # Compute the smooth sensitivity epsilon = 1.0 smooth_scaling = np.exp(-epsilon * np.arange(n + 1)) smooth_sensitivity = np.max(lsd * smooth_scaling) # ----------------------------------------------------------------------------- # Compute the exact triangle count triangle_count = np.array([sum(nx.triangles(g).values()) / 3.0]) # Create a differentially private release mechanism mechanism = CauchyMechanism(epsilon=epsilon, beta=beta) # Compute the differentially private query response dp_triangle_count = mechanism.release(triangle_count, smooth_sensitivity) print("Exact triangle count = %i" % int(triangle_count)) print("Differentially private triangle count = %f" % dp_triangle_count) # ============================================================================= # Minimum Spanning Tree Cost # ============================================================================= def lightweight_graph(g, w):
a_t = nth(A, 3).diagonal().sum() / 6 n = len(A[:, 0]) p_t = binom(n, 3) # ============================================================================= # Ex. 13.1.4: Apply the function fraction_triangles to el_agg and print the # triangle fraction in the network. Next remove all edges that go between # classes. Compute triangle fraction within each class and store it. Compute # the mean within class triangles and bootstrap the standard error of the mean. # Comment on the output # ============================================================================= F = nx.from_pandas_edgelist(el_agg, 'u1', 'u2', edge_attr=True) t = nx.triangles(F) T = sum(t.values()) / 3 print("Number of triangles: ", T) ft = fraction_triangles(el_agg, ind.u) print("fraction of triangles: ", ft) AC = nx.average_clustering(F) print("clustering cofficient: ", AC) el_agg_class = el_agg[el_agg['class1'] == el_agg['class2']] tjeck = el_agg[el_agg['class1'] != el_agg['class2']] counter = pd.DataFrame(ind.groupby(['class']).u.count())
if needs_eig: print "[+] Computing eigenvector centrality..." eig = pd.Series(nx.eigenvector_centrality_numpy(graph), name='eigenvector_centrality').reset_index() eig.columns = ['node_name', 'eigenvector_centrality'] if needs_clu: print "[+] Computing clustering coefficient..." clu = pd.Series(nx.clustering(graph), name='clustering_coefficient').reset_index() clu.columns = ['node_name', 'clustering_coefficient'] if needs_tri: print "[+] Computing number of triangles..." tri = pd.Series(nx.triangles(graph), name='triangles').reset_index() tri.columns = ['node_name', 'triangles'] if needs_clo: print "[+] Computing closeness centrality..." clo = pd.Series(nx.closeness_centrality(graph), name='closeness_centrality').reset_index() clo.columns = ['node_name', 'closeness_centrality'] if needs_pag: print "[+] Computing pagerank..." pag = pd.Series(nx.pagerank(graph), name='pagerank').reset_index() pag.columns = ['node_name', 'pagerank'] if needs_squ: print "[+] Computing square clustering..."
def get_triangles(Graph): return nx.triangles(Graph)