def detectBetweenness(G, numClusters, sites, bipartite): Gnew = copy.deepcopy(G) numComponents = nx.number_connected_components(G) betweenness = nx.edge_betweenness_centrality(Gnew, weight='capacity') pickle.dump(betweenness, open("betweennessUnipartite.p", "wb")) #betweenness = pickle.load("betweenessUnipartite.p", "rb") while (numComponents < numClusters): print "num components is now ", numComponents ### REMEMBER TO DELETE THIS ### # calculate betweenness of each edge betweenness = nx.edge_betweenness_centrality(Gnew, weight='capacity') ## identify and remove the edge with highest betweenness max_ = max(betweenness.values()) for k, v in betweenness.iteritems(): if float(v) == max_: G.remove_edge(k[0], k[1]) numComponents = nx.number_connected_components(G) clusters = {} i=0 j = 0 for component in list(nx.connected_components(Gnew)): for node in component: if node in sites: clusters[node] = i j +=1 print j, "Nodes in cluster ", i j = 0 i += 1 return clusters
def find_groups_girvan_newman(self, num_groups): if (num_groups==1): return set([self.G]) elif (num_groups in self.groupCache): #return a copy of the stored set return self.groupCache[num_groups].copy() elif (num_groups > len(self.G.nodes())): return self.find_groups(len(self.G.nodes)) #returns set of subgraphs previous_partition = self.find_groups(num_groups-1) #map subgraph to betweenness dict (a dict mapping edges to betweenness) betweenness_map = {subgraph:nx.edge_betweenness_centrality(subgraph) for subgraph in previous_partition} #Map subgraph to the (edge, betweenness) pair of the max betweenness in that subgraph betweenness_max_map = {e[0]:max(e[1].items(), key=lambda(x):x[1]) for e in betweenness_map.items() if len(e[0].nodes()) > 1} #Track removed edges to add them again at end of algorithm removed_edges = [] #Loop until a subgraph is split while True: print "Removing edge" #Identify the subgraph and edge with max betweenness target_subgraph_edge=max(betweenness_max_map.items(), key=lambda(x):x[1][1]) target_subgraph = target_subgraph_edge[0] target_edge= target_subgraph_edge[1][0] max_betweenness = -1 #Remove the edge (temporarily) target_subgraph.remove_edge(target_edge[0], target_edge[1]) removed_edges.append(target_edge) connected_components = nx.connected_components(target_subgraph) if len(connected_components) > 1: #Removing one edge from a connected component will result in max 2 connected components new_subgraph_1 = target_subgraph.subgraph(connected_components[0]) new_subgraph_2 = target_subgraph.subgraph(connected_components[1]) #Repair removed edges in target_subgraph target_subgraph.add_edges_from(removed_edges) #Remove target subgraph previous_partition.discard(target_subgraph) #Add new subgraphs previous_partition.add(new_subgraph_1) previous_partition.add(new_subgraph_2) #Store result self.groupCache[num_groups] = previous_partition return previous_partition.copy() else: #Recalculate betweenness, max betweenness for target subgraph target_betweenness = nx.edge_betweenness_centrality(target_subgraph) betweenness_map[target_subgraph] = target_betweenness betweenness_max_map[target_subgraph] = max(target_betweenness.items(), key=lambda(x):x[1]) #Repeat loop continue
def run_n(self, n): # Until there is no edge in the graph while len(self.G.edges()) != 0: # Find the most betweenness edge edge = max(nx.edge_betweenness_centrality(self.G).items(), key=lambda item: item[1])[0] # Remove the most betweenness edge self.G.remove_edge(edge[0], edge[1]) # Get the the connected nodes components = [ list(c) for c in list(nx.connected_components(self.G)) ] # Divide the graph into n parts. if len(components) <= n: # Compute Q currentQ = self.calculateQ(components, self.G_copy) if currentQ not in self.all_Q: self.all_Q.append(currentQ) if currentQ > self.max_Q: self.max_Q = currentQ self.partition = components print('The number of communities:', len(self.partition)) print('Max_Q:', self.max_Q) print(self.partition) return self.partition, self.all_Q, self.max_Q
def plot_edge_btwn(G, bins=20): """ Plot the edge-betweenness distributions. Args: G: networkx graph object Returns: figure handle & axes array. """ # Get edge-betweenness dictionary edge_btwn_dict = nx.edge_betweenness_centrality(G) # Sort edge-betweenness dictionary by edge-betweenness values edge_btwn_labels_sorted, edge_btwn_vec_sorted = \ network_compute.get_ranked(edge_btwn_dict) # Open figure & axes fig, axs = plt.subplots(2, 1) # Plot histogram axs[0].hist(edge_btwn_vec_sorted, bins) axs[0].set_ylabel('Occurrences') axs[0].set_xlabel('Edge-betweenness') # Plot sorted node between values axs[1].scatter(np.arange(len(edge_btwn_vec_sorted)), edge_btwn_vec_sorted, s=20, c='r') axs[1].set_xlabel('Area') axs[1].set_ylabel('Edge-betweenness') return fig, axs
def f28(self): start = 0 c_vals = nx.edge_betweenness_centrality(self.G).values() res = sum(c_vals) stop = 0 # self.feature_time.append(stop - start) return res
def test_C4(self): """Edge betweenness centrality: C4""" G=nx.cycle_graph(4) b=nx.edge_betweenness_centrality(G, weight=None, normalized=True) b_answer={(0, 1):2,(0, 3):2, (1, 2):2, (2, 3): 2} for n in sorted(G.edges()): assert_almost_equal(b[n],b_answer[n]/6.0)
def run(self): while len(self.G.edges()) != 0: edges = {} edges_betweenness_centrality = nx.edge_betweenness_centrality( self.G) #for e, ebc in edges_betweenness_centrality.items(): #print(print(self.G.get_edge_data(e[0],e[1]))) for e, ebc in edges_betweenness_centrality.items(): #print(self.G.get_edge_data(e[0],e[1])) edge_weight = ebc / self.G.get_edge_data(e[0], e[1])['weight'] edges[e] = edge_weight edge = max(edges.items(), key=lambda item: item[1])[0] self.G.remove_edge(edge[0], edge[1]) components = [ list(c) for c in list(nx.connected_components(self.G)) ] if len(components) != len(self.partition): #compute the Q cur_Q = self.cal_Q(components, self.G_copy) if cur_Q not in self.all_Q: self.all_Q.append(cur_Q) if cur_Q > self.max_Q: self.max_Q = cur_Q self.partition = components print('-----------the divided communities and the Max Q------------') print('The number of Communites:', len(self.partition)) print('Max_Q:', self.max_Q) print(self.partition) return self.partition, self.all_Q, self.max_Q
def divisive_approach(graph): """ a modularity-based algorithm, by deleting the weakest links in graph; edge betweenness is the negatively relevant strength score for a link :param graph: a nx.Graph :return: labels of each node """ time_start = time.time() print("Calculating communities with DA...") g = clone_graph(graph) partitions = [[n for n in g.nodes()]] labels = list(g.nodes()) max_q = 0.0 while len(g.edges()) > 0: edge = max(nx.edge_betweenness_centrality(g).items(), key=lambda item: item[1])[0] g.remove_edge(edge[0], edge[1]) components = [list(c) for c in list(nx.connected_components(g))] if len(components) != len(partitions): q = cal_Q(components, graph) if q > max_q: max_q = q partitions = components for i in range(len(partitions)): for node in partitions[i]: labels[node] = i time_end = time.time() print("Calculation time:", time_end - time_start, "seconds") return labels
def run(self): #Until there is no edge in the graph while len(self.G.edges()) != 0: #Find the most betweenness edge edge = max(nx.edge_betweenness_centrality(self.G).items(), key=lambda item: item[1])[0] #Remove the most betweenness edge self.G.remove_edge(edge[0], edge[1]) #List the the connected nodes components = [ list(c) for c in list(nx.connected_components(self.G)) ] if len(components) != len(self.partition): #compute the Q cur_Q = self.cal_Q(components, self.G_copy) if cur_Q not in self.all_Q: self.all_Q.append(cur_Q) if cur_Q > self.max_Q: self.max_Q = cur_Q self.partition = components print('-----------the Max Q and divided communities-----------') print('The number of Communites:', len(self.partition)) print("Communites:", self.partition) print('Max_Q:', self.max_Q) return self.partition, self.all_Q, self.max_Q
def test_K5(self): """Edge betweenness centrality: K5""" G=nx.complete_graph(5) b=nx.edge_betweenness_centrality(G, weight='weight', normalized=False) b_answer=dict.fromkeys(G.edges(),1) for n in sorted(G.edges()): assert_almost_equal(b[n],b_answer[n])
def cluster_edge_betweenness(iterations, G): for i in range(iterations): print('Iteration ', i + 1, ' of ', iterations) eb = nx.edge_betweenness_centrality(G, 10) max_eb = max(eb, key=eb.get) G.remove_edge(max_eb[0], max_eb[1]) return G
def run(self): while len(self.G.edges()) != 0: # nx.edge_betweenness_centrality 返回的是类似于 {('C', 'F'): 0.4} 这种结构 # 计算每天边界数,寻找边届数最大的边 edge = max(nx.edge_betweenness_centrality(self.G).items(), key=lambda item: item[1])[0] # 移除边界数最大的边 self.G.remove_edge(edge[0], edge[1]) # List the the connected nodes components = [ list(c) for c in list(nx.connected_components(self.G)) ] if len(components) != len(self.partition): # compute the Q # nx.algorithms.community.modularity(self.G_copy, components) 可以直接调用networkx的库函数 等价于 call_Q() cur_Q = self.cal_Q(components, self.G_copy) if cur_Q not in self.all_Q: self.all_Q.append(cur_Q) # 还可以在这一步做一个map换成call_Q与components的关系 if cur_Q > self.max_Q: self.max_Q = cur_Q self.partition = components print('-----------the Max Q and divided communities-----------') print('The number of Communites:', len(self.partition)) print("Communites:", self.partition) print('Max_Q:', self.max_Q) return self.partition, self.all_Q, self.max_Q
def test_P4(self): """Edge betweenness centrality: P4""" G=nx.path_graph(4) b=nx.edge_betweenness_centrality(G, weight='weight', normalized=False) b_answer={(0, 1):3,(1, 2):4, (2, 3):3} for n in sorted(G.edges()): assert_almost_equal(b[n],b_answer[n])
def CalculateBetweeness(graph): BetweenValue = nx.edge_betweenness_centrality(graph, normalized=True, k=None, weight=None, seed=None) graph.remove_edges_from([k for k, v in BetweenValue.iteritems() if v == max(BetweenValue.values())]) return graph
def test_normalized_P4(self): """Edge betweenness centrality: P4""" G = nx.path_graph(4) b = nx.edge_betweenness_centrality(G, weight=None, normalized=True) b_answer = {(0, 1): 3, (1, 2): 4, (2, 3): 3} for n in sorted(G.edges()): assert_almost_equal(b[n], b_answer[n] / 6.0)
def file_to_dot(infile): interactions = defaultdict(lambda: 0) es = list(edges(infile)) for a, b, users in es: interactions[a, b] = users keys, ratio = compute_ratios(interactions, lambda k: interactions[k, k] > 5) G = nx.Graph() for a, b, users in es: if a > b and a in keys and b in keys and users > 0 and ratio[a, b] > 0: rat = (ratio[a, b] + ratio[b, a]) / 2 G.add_edge(a, b, { 'ratio': rat, 'users': users, 'connection': rat**-1 }) btwn = nx.edge_betweenness_centrality(G, weight='connection') GG = nx.Graph() GG.add_edges_from([(a, b, { 'weight': val }) for (a, b), val in btwn.items()]) # GG.add_edges_from([(a, b, merge({'betweenness':btwn[a,b]}, G[a][b])) # for a,b in G.edges_iter()]) for node in GG.nodes_iter(): GG.node[node]['height'] = GG.node[node]['width'] = size(G, node) GG.node[node]['color'] = color(G, node) Gtree = nx.minimum_spanning_tree(GG) Gtree_dot = nx.to_pydot(Gtree) return Gtree_dot.to_string()
def list_edge_betweenness(G): edge={} edges_list=list(G.edges()) edge_betweenness=nx.edge_betweenness_centrality(G, normalized=True, weight='weight') for i in edge_betweenness.keys(): edge[i]=edge_betweenness[i] return edge
def _calc_bc_subset(G, Gnx, normalized, weight, k, seed, result_dtype): # NOTE: Networkx API does not allow passing a list of vertices # And the sampling is operated on Gnx.nodes() directly # We first mimic acquisition of the nodes to compare with same sources random.seed(seed) # It will be called again in nx's call sources = random.sample(Gnx.nodes(), k) # NOTE: Since we sampled the Networkx graph, the sources are already # external ids, so we don't need to translate to external ids for # cugraph df = cugraph.edge_betweenness_centrality( G, k=sources, normalized=normalized, weight=weight, result_dtype=result_dtype, ) nx_bc_dict = nx.edge_betweenness_centrality( Gnx, k=k, normalized=normalized, weight=weight, seed=seed ) nx_df = generate_nx_result(nx_bc_dict, type(Gnx) is nx.DiGraph).rename( columns={"betweenness_centrality": "ref_bc"}, copy=False ) merged_df = df.merge(nx_df, on=['src', 'dst']).rename( columns={"betweenness_centrality": "cu_bc"}, copy=False ).reset_index(drop=True) return merged_df
def get_communities(graph): betweenness = nx.edge_betweenness_centrality(graph) sorted_betweeness = [x[0] for x in sorted(betweenness.items(), key = lambda x : x[1], reverse = True)] best_partitions = [] max_modularity = -1.0 graph_copy = graph.copy() while sorted_betweeness: communities = [list(x) for x in nx.connected_components(graph_copy)] partitions = {} for i in range(len(communities)): for node in communities[i]: partitions[node] = i modularity = community.modularity(partitions, graph_copy) if modularity > max_modularity: best_partitions = communities max_modularity = modularity elif modularity <= max_modularity: break; graph_copy.remove_edge(*sorted_betweeness[0]) del sorted_betweeness[0] for partition in best_partitions: print sorted(partition) val_map = {} for partition in best_partitions: value = random.random() while value in val_map.values(): value = random.random() for node in partition: val_map[node] = value values = [val_map.get(node) for node in graph.nodes()] nx.draw_spring(graph, node_color = values, node_size = 500, with_labels = True) plt.savefig(sys.argv[2])
def _calc_bc_full(G, Gnx, normalized, weight, k, seed, result_dtype): df = cugraph.edge_betweenness_centrality( G, k=k, normalized=normalized, weight=weight, seed=seed, result_dtype=result_dtype, ) assert ( df["betweenness_centrality"].dtype == result_dtype ), "'betweenness_centrality' column has not the expected type" nx_bc_dict = nx.edge_betweenness_centrality( Gnx, k=k, normalized=normalized, seed=seed, weight=weight ) nx_df = generate_nx_result(nx_bc_dict, type(Gnx) is nx.DiGraph).rename( columns={"betweenness_centrality": "ref_bc"}, copy=False ) merged_df = df.merge(nx_df, on=['src', 'dst']).rename( columns={"betweenness_centrality": "cu_bc"}, copy=False ).reset_index(drop=True) return merged_df
def girvan_newman_algorithm(G, weight): """ G는 원래 네트워크 g는 Edge를 한개씩 끊어나갈 네트워크 """ g = G.copy() """ initial """ step = 0 # step log_step = [] # step 기록 log_modularity = [] # modularity 기록 old_max_m = 0 # 이전 최대 modularity 기억 k = sorted(nx.connected_components(G), key=len, reverse=True) # k 는 모두 연결되어있는 Community를 노드로 나타낸 값 m = community.modularity(G, communities=k, weight=weight) # modularity max_step = 0 # max_step은 modularity가 최대일 때 step값 기록용 """ Girvan-Newman algorithm """ while len(g.edges()) > 0: k = sorted(nx.connected_components(g), key=len, reverse=True) # 커뮤니티 추출 m = community.modularity(G, communities=k, weight=weight) # 추출된 커뮤니티의 modularity 계산 if m > old_max_m: # 이전 최대 modularity보다 현재 modularity가 높을 경우 기록 max_step = step old_max_m = m log_step = log_step + [step] # 로깅용 log_modularity = log_modularity + [m] # 로깅용 print("step: ", step, " modularity: ", m) """ remove edge """ step = step + 1 betweenness = nx.edge_betweenness_centrality( g, weight=weight) # betweennes centrality 계산 max_edge = max( betweenness, key=betweenness.get) # betweeness centrality가 가장 큰 Edge 선택 g.remove_edge(max_edge[0], max_edge[1]) # Edge 추출 return log_step, log_modularity, max_step
def test_edge_betweenness_centrality_nx( graph_file, directed, edgevals ): Gnx = utils.generate_nx_graph_from_file(graph_file, directed, edgevals) assert nx.is_directed(Gnx) == directed nx_bc = nx.edge_betweenness_centrality(Gnx) cu_bc = cugraph.edge_betweenness_centrality(Gnx) # Calculating mismatch networkx_bc = sorted(nx_bc.items(), key=lambda x: x[0]) cugraph_bc = sorted(cu_bc.items(), key=lambda x: x[0]) err = 0 assert len(networkx_bc) == len(cugraph_bc) for i in range(len(cugraph_bc)): if ( abs(cugraph_bc[i][1] - networkx_bc[i][1]) > 0.01 and cugraph_bc[i][0] == networkx_bc[i][0] ): err = err + 1 print(f"{cugraph_bc[i][1]} and {cugraph_bc[i][1]}") print("Mismatches:", err) assert err < (0.01 * len(cugraph_bc))
def test_K5(self): """Edge betweenness centrality: K5""" G=nx.complete_graph(5) b=nx.edge_betweenness_centrality(G, weight=None, normalized=False) b_answer=dict.fromkeys(G.edges(),1) for n in sorted(G.edges()): assert_almost_equal(b[n],b_answer[n])
def EBGirvanNewman(G): print("Inside EBGirvanNewman") initcomp = nx.number_connected_components(G) ncomp = initcomp print("Initial ncomp, ", ncomp) while ncomp <= initcomp: bw = nx.edge_betweenness_centrality(G, weight='weight') #print(bw) #find the edge with max centrality max_ = max(bw.values()) print(".....................", max_) #find the edge with the highest centrality and remove all of them-more than one #for k, v in bw.iteritems(): for k, v in bw.items(): if float(v) == max_: G.remove_edge(k[0], k[1]) global k1 k1 = k[1] global k0 k0 = k[0] #recalculate the no of components ncomp = nx.number_connected_components(G) print("No. of components", ncomp) print("Over............")
def gRa(g, w): '''w为图中的边数,表示经过减边p扰动后仍然留在数据中的边数''' tg = g.copy() Rq = nx.to_scipy_sparse_matrix(g) Rq = Rq.toarray() bw = nx.edge_betweenness_centrality(g, normalized=False) norm = sum(bw.values()) e_num = len(g.edges()) n = len(g) N = (n * (n - 1)) / 2 for k, v in bw.items(): g.add_edge(*k, weight=v) # print g.edges(data=True) R = nx.to_scipy_sparse_matrix(g, weight='weight') Rp = R.toarray() Rp = w * Rp * 2.0 / Rp.sum() q = float(e_num - w) / (N - e_num) for i, each in enumerate(Rq): for j, e in enumerate(each): if e == 0: Rp[i, j] = q # 超级绕采用特别方式在Rp中加入Rq for i in range(n): Rp[i,i]=0 #去除对角线上的q return Rp
def run(self): # Until there is no edge in the graph while len(self.G.edges()) != 0: # Find the most betweenness edge edge = max(nx.edge_betweenness_centrality(self.G).items(), key=lambda item: item[1])[0] # Remove the most betweenness edge self.G.remove_edge(edge[0], edge[1]) # Get the the connected nodes components = [ list(c) for c in list(nx.connected_components(self.G)) ] # When the dividing is needed, this is for finding the maxQ and record it while trying. if len(components) != len(self.partition): # Compute Q currentQ = self.calculateQ(components, self.G_copy) if currentQ not in self.all_Q: self.all_Q.append(currentQ) if currentQ > self.max_Q: self.max_Q = currentQ self.partition = components print('The number of communities:', len(self.partition)) print('Max_Q:', self.max_Q) print(self.partition)
def find_disjoint_graphs(my_graph): #Dictionary of edges with the calculated value of betweenness centrality edgeList = nx.edge_betweenness_centrality(my_graph) maxEdgeBetweenness = 0 edgeNodes = () # Loop over items and unpack each item, find maxEdgeBetweenness among all items. for node_id, edgeBetweennessVal in edgeList.items(): #print("EdgeBetweenness = %f " % edgeBetweennessVal) #print("EdgeNodes = %s" % (node_id,)) if edgeBetweennessVal > maxEdgeBetweenness: maxEdgeBetweenness = edgeBetweennessVal edgeNodes = node_id print("Highest betweenness is %f - for the edge %s" % (maxEdgeBetweenness, edgeNodes,)) #Remove the edge with highest betweenness my_graph.remove_edge(edgeNodes[0], edgeNodes[1]) print("Removed edge %s" % (edgeNodes,)) #Add the removed edge to the edges_removed list edges_removed.append(edgeNodes) num_of_connected_components = nx.number_connected_components(my_graph) print("Number of connected components(sub-graphs/communities) after removing edge %s = %d" % (edgeNodes,num_of_connected_components)) G = my_graph # Draw and show the graph, with labels nx.draw_networkx(my_graph, pos=None, with_labels=True) plt.show()
def EBC_weights(g): w = nx.edge_betweenness_centrality(g) edges = [(u, v, w[(u, v)]) for u, v in w] _g = g.copy() _g.add_weighted_edges_from(edges) return nx.to_numpy_array(_g)
def girvan_newman_step(graph): ''' INPUT: Graph G OUTPUT: None Run one step of the Girvan-Newman community detection algorithm. Afterwards, the graph will have one more connected component. ''' size = nx.number_connected_components(graph) edges = graph.number_of_edges() biggroup = len(max(nx.connected_components(graph), key=len)) cur = 0 while cur <= size: most_connected = Counter( nx.edge_betweenness_centrality(graph, weight='sim')).most_common(1)[0][0] node1 = most_connected[0] node2 = most_connected[1] outitems = (node1, node2, size, edges, biggroup, time()) outs = '%d, %d, %d, %d, %d, %f\n' % outitems print outs sys.stdout.flush() #print most_connected; sys.stdout.flush() graph.remove_edge(*most_connected) cur = nx.number_connected_components(graph)
def whole_graph_metrics(graph, weighted=False): graph_metrics = {} # Shortest average path length graph_metrics['avg_shortest_path'] = \ nx.average_shortest_path_length(graph, weight=weighted) # Average eccentricity ecc_dict = nx.eccentricity(graph) graph_metrics['avg_eccentricity'] = np.mean(np.array(ecc_dict.values())) # Average clustering coefficient # NOTE: Option to include or exclude zeros graph_metrics['avg_ccoeff'] = \ nx.average_clustering(graph, weight=weighted, count_zeros=True) # Average node betweeness avg_node_btwn_dict = nx.betweenness_centrality(graph, normalized=True) graph_metrics['avg_node_btwn'] = \ np.mean(np.array(avg_node_btwn_dict.values())) # Average edge betweeness avg_edge_btwn_dict = nx.edge_betweenness_centrality(graph, normalized=True) graph_metrics['avg_edge_btwn'] = \ np.mean(np.array(avg_edge_btwn_dict.values())) # Number of isolates graph_metrics['isolates'] = len(nx.isolates(graph)) return graph_metrics
def test_balanced_tree(self): """Edge betweenness centrality: balanced tree""" G = nx.balanced_tree(r=2, h=2) b = nx.edge_betweenness_centrality(G, weight="weight", normalized=False) b_answer = {(0, 1): 12, (0, 2): 12, (1, 3): 6, (1, 4): 6, (2, 5): 6, (2, 6): 6} for n in sorted(G.edges()): assert_almost_equal(b[n], b_answer[n])
def communitySplits(self, graph): """ Compute the splits for the formation of communities. Arguments: graph - A networkx graph of digraph. Returns: The graph with weak edges removed. """ nConnComp = nx.number_connected_components(graph) nComm = nConnComp while (nComm <= nConnComp): betweenness = nx.edge_betweenness_centrality(graph) if (len(betweenness.values()) != 0 ): max_betweenness = max(betweenness.values()) else: break for u,v in betweenness.iteritems(): if float(v) == max_betweenness: graph.remove_edge(u[0], u[1]) nComm = nx.number_connected_components(graph) return graph
def edge_betweeness_centrality(X): """ based on networkx function: edge_betweenness_centrality """ XX = np.zeros(X.shape) for i, value in enumerate(X): adj_mat = value.reshape((np.sqrt(len(value)),-1)) adj_mat = (adj_mat - np.min(adj_mat)) / (np.max(adj_mat) - np.min(adj_mat)) adj_mat = 1 - adj_mat # th = np.mean(adj_mat) + 0.1 # adj_mat = np.where(adj_mat < th, adj_mat, 0.) percent, th, adj_mat, triu = percentage_removed(adj_mat, 0.43) # 43 #63 #73 print("percent = {0}, threshold position = {1}, threshold = {2}\n".format(percent, th, triu[th])) g = nx.from_numpy_matrix(adj_mat) print "Graph Nodes = {0}, Graph Edges = {1} ".format(g.number_of_nodes(), g.number_of_edges()) print "\nEdge kept ratio, {0}".format(float(g.number_of_edges())/((g.number_of_nodes()*(g.number_of_nodes()-1))/2)) bet_cent = nx.edge_betweenness_centrality(g, weight = 'weight', normalized = True) edge_cent = np.zeros(adj_mat.shape) for k in bet_cent: edge_cent[k[0],k[1]] = bet_cent[k] XX[i] = edge_cent.reshape(-1) print "graph {0} => mean {1}, min {2}, max {3}".format(i, np.mean(XX[i]), np.min(XX[i]), np.max(XX[i])) return XX
def test_C4(self): """Edge betweenness centrality: C4""" G = nx.cycle_graph(4) b = nx.edge_betweenness_centrality(G, weight=None, normalized=True) b_answer = {(0, 1): 2, (0, 3): 2, (1, 2): 2, (2, 3): 2} for n in sorted(G.edges()): assert_almost_equal(b[n], b_answer[n] / 6)
def most_valuable_edge(G): """Returns the edge with the highest betweenness centrality in the graph `G`. """ # We have guaranteed that the graph is non-empty, so this # dictionary will never be empty. betweenness = nx.edge_betweenness_centrality(G) return max(betweenness, key=betweenness.get)
def edge_betweenness(graph): #returns the maximum of edge_betweenness_centrality max = 0 dict = nx.edge_betweenness_centrality(graph, True) for v in dict.values(): if max < v: max = v return max
def edge_betweenness(edge_list=path+'connected-component-analysis/network-profiling-data/cid6_analysis/cid6-edge-list', out_file=path+'connected-component-analysis/network-profiling-data/cid6_analysis/cid6-edge-betweenness-10000.json'): G = nx.read_edgelist(edge_list, delimiter='\t') M = nx.edge_betweenness_centrality(G, k=10000) M_new = dict() for k, v in M.items(): M_new[k[0]+'\t'+k[1]] = v json.dump(M_new, open(out_file, 'w'))
def edge_betweenness_centrality(f, ft, gnx): start = timer.start(ft, 'Edge Betweenness Centrality') result = nx.edge_betweenness_centrality(gnx) timer.stop(ft, start) for k in result: f.writelines(str(k) + ',' + str(result[k]) + '\n') return result
def edge_remove(G): dict1 = nx.edge_betweenness_centrality(G) list_tup = [] for i in dict1: k = (i, dict1[i]) list_tup.append(k) list_tup.sort(key=lambda x:x[1], reverse=True) return list_tup[0][0];
def split_graph(G): initial_communities = nx.number_connected_components(G) while initial_communities == nx.number_connected_components(G): betweenness = nx.edge_betweenness_centrality(G) edge_array = np.array([key for key, val in betweenness.items()]) between_array = np.array([val for key, val in betweenness.items()]) most_important_edge = edge_array[np.argmax(between_array)] G.remove_edge(most_important_edge[0], most_important_edge[1])
def _calc_edges_betweenness(G): # For each edge calculates the betweenness. edge_betweenness = dict.fromkeys(G.edges, 0) edge_betweenness_by_pair_of_nodes = nx.edge_betweenness_centrality(G) for key in edge_betweenness: edge_betweenness[key] = edge_betweenness_by_pair_of_nodes[key[:2]] ### G.edges[list(G.edges)[0]]['betweenness'] return edge_betweenness
def bc_filter(G): g = G.copy() for u, v, d in g.edges(data=True): g[u][v]['inverse_weight'] = 1/d['weight'] for (u, v), d in nx.edge_betweenness_centrality(g, weight='inverse_weight').items(): g[u][v]['BC'] = d return g
def Betweenness_Edge_centrality(G, fn): ''' G: Graph fn: funtion of n used to delimit the number of hotspots ''' edgeBetDict = nx.edge_betweenness_centrality(G) edgeBetCentr = SortDictionary(edgeBetDict)[:int(fn)] return edgeBetCentr
def setCapacity(G, factor): if(G.number_of_edges() > 0): bb=nx.edge_betweenness_centrality(G, normalized=True, weight='weight') for edge in G.edges(data=True): edge[2]['capacity'] = (bb[(edge[0],edge[1])]*factor) return G
def test_balanced_tree(self): """Edge betweenness centrality: balanced tree""" G=nx.balanced_tree(r=2,h=2) b=nx.edge_betweenness_centrality(G, weight='weight', normalized=False) b_answer={(0, 1):12,(0, 2):12, (1, 3):6,(1, 4):6,(2, 5):6,(2,6):6} for n in sorted(G.edges()): assert_almost_equal(b[n],b_answer[n])
def get_betweenness_dictionary(edges,costs): ## This is slow, and uses a graph library. (This is ALMOST what you're implementing for the programming assignment!) G = nx.Graph() G.add_edges_from(edges) for u,v in G.edges: G[u][v]['cost'] = costs[u][v] centralities = nx.edge_betweenness_centrality(G, normalized=False, weight='cost') return centralities
class Topology(object, nx.Graph): for N in [20, 30, 40]: for delta in [2, 4, 8]: fmax_vector = [] for i in range(5): nodes = range(N) np.random.seed(5) degree = [delta for i in xrange(N)] G = nx.directed_havel_hakimi_graph(degree, degree) G = nx.DiGraph(G) bb = nx.edge_betweenness_centrality(G, normalized=False) nx.set_edge_attributes(G, 'weight', bb) nx.set_edge_attributes(G, 'capacity', bb) T_matrix = np.zeros((N, N)) for s in nodes: for d in nodes: if s != d: flow = np.random.uniform(0.5, 1.5) T_matrix[s, d] = flow if G.has_edge(s, d): G.edge[s][d]['weight'] = flow G.edge[s][d]['capacity'] = np.random.randint( 8, 12) f_value = 0 (p, a) = (0, 0) for i in range(N): for j in range(N): edges = nx.shortest_path(G, i, j, weight='weight') for k in range(len(edges) - 1): G.edge[edges[k]][edges[ k + 1]]['weight'] += T_matrix[i][j] if i != j: flow_value = nx.maximum_flow_value(G, i, j) if flow_value > f_value: f_value = flow_value (p, a) = (i, j) fmax = 0 (s_f, d_f) = (0, 0) for s in G.edge: for d in G.edge[s]: if G.edge[s][d]['weight'] > fmax: fmax = G.edge[s][d]['weight'] (s_f, d_f) = (s, d) fmax_vector.append(fmax) tot_edges = G.number_of_edges() np.set_printoptions(precision=3) #print T_matrix #print tot_edges print 'N = ' + str(N) + ' D = ' + str(delta) + 'fmax = ' + str( np.mean(fmax_vector)) #+ ' flow = ' + str(flow_value)
def centralize_graph(graph, epb='lgth', efb='capa', ndg='capa', nec='capa', npr='capa'): """Compute edge centralities. Parameters ---------- graph : original graph epb : edge property used for computation of edge path betweenness efb : " flow betweenness ndg : " degree centrality nec : " eigenvector centrality npr : " page rank Returns ------- graphCentralities : graph with computed edge centralities """ graphCentralities = graph.copy() edges = graphCentralities.edges(data=True) edgeCapacity = 1.0 * np.array([property['capa'] for node1, node2, property in edges]) edgeCapacity /= edgeCapacity.sum() edgeLength = 1.0 / edgeCapacity for index, (node1, node2, property) in enumerate(edges): property['capa'] = edgeCapacity[index] property['lgth'] = edgeLength[index] edgeBetweenCentrality = nx.edge_betweenness_centrality(graphCentralities, weight=epb) edgeFlowBetweennessCentrality = nx.edge_current_flow_betweenness_centrality(graphCentralities, weight=efb) lineGraph = nx.line_graph(graphCentralities) degree = graphCentralities.degree(weight=ndg) for node1, node2, property in lineGraph.edges(data=True): intersectingNodes = list(set(node1).intersection(node2))[0] property[ndg] = degree[intersectingNodes] eigenvectorCentrality = nx.eigenvector_centrality_numpy(lineGraph, weight=ndg) pageRank = nx.pagerank(lineGraph, weight=ndg) degreeCentrality = dict(lineGraph.degree(weight=ndg)) for index, (node1, node2, property) in enumerate(edges): edge = (node1, node2) if (edge in edgeBetweenCentrality.keys()): property['epb'] = edgeBetweenCentrality[edge] else: property['epb'] = edgeBetweenCentrality[edge[::-1]] if (edge in edgeFlowBetweennessCentrality.keys()): property['efb'] = edgeFlowBetweennessCentrality[edge] else: property['efb'] = edgeFlowBetweennessCentrality[edge[::-1]] if (edge in degreeCentrality.keys()): property['ndg'] = degreeCentrality[edge] else: property['ndg'] = degreeCentrality[edge[::-1]] if (edge in eigenvectorCentrality.keys()): property['nec'] = eigenvectorCentrality[edge] else: property['nec'] = eigenvectorCentrality[edge[::-1]] if (edge in pageRank.keys()): property['npr'] = pageRank[edge] else: property['npr'] = pageRank[edge[::-1]] return(graphCentralities)
def compute_edge_betweenness(g): mp.weight_graph(g) ebc = nx.edge_betweenness_centrality(g,'weight') m = 0 for v in ebc.values(): if v>m: m = v return ebc,m
def edge_betweenness_centrality(self, k=None): """ Calculate the edge betweenness centrality of each pair of hyperedges in mobile network. The algorithm uses `distance` to weight each segment. """ road_bw = nx.edge_betweenness_centrality(self.graph, k=k, weight='distance') mobile_bw = {} for (source, target), betweenness in road_bw.items(): if source in self.coordmapr and target in self.coordmapr: mobile_bw[(self.coordmapr[source], self.coordmapr[target])] = betweenness return mobile_bw
def find_best_edge(G0): """ Networkx implementation of edge_betweenness returns a dictionary. Make this into a list, sort it and return the edge with hoghest betweenness. """ eb = nx.edge_betweenness_centrality(G0) eb_il = eb.items() eb_il.sort(key=lambda x: x[1], reverse=True) return eb_il[0][0]
def Girvannewman(G): initialcomp = nx.number_connected_components(G) '''totalnumcomp = initialcomp while totalnumcomp <= initialcomp:''' bw = nx.edge_betweenness_centrality(G) maximum_value = max(bw.values()) for key, value in bw.iteritems(): if float(value) == maximum_value: G.remove_edge(key[0],key[1]) totalnumcomp = nx.number_connected_components(G)
def process_data(denom=100000, round=0): f = csv.reader(open("../applab_new_6.csv", 'rb'), delimiter=',') db = nx.DiGraph() full_users = set() i = 0 uniquect = 0 for line in f: if i % 100000 == 0 : print "processed", i, "lines" if i == 1000: break sender, receiver, date, time, duration, cost, location, region = map(lambda x: x.strip(), line) if sender not in full_users: uniquect += 1 full_users.add(sender) if uniquect <= 2: #% denom - round == 0: db.add_node(sender) if db.has_node(receiver) == False: db.add_node(receiver) else: if db.has_node(receiver) == False: db.add_node(receiver) if db.has_edge(sender, receiver): db[sender][receiver]['weight'] += int(duration) else: db.add_edge(sender, receiver, weight=int(duration)) i+=1 #pickle.dump(db, open("users_networkx.p" % str(round), "wb")) #print "degree assortativity coeff:", nx.degree_assortativity_coefficient(db) #print "average degree connectivity:", nx.average_degree_connectivity(db) # print "k nearest neighbors:", nx.k_nearest_neighbors(db) print "calculating deg cent" deg_cent = nx.degree_centrality(db) #sorted(nx.degree_centrality(db).items(), key=lambda x: x[1]) print "calculating in deg cent" in_deg_cent = nx.in_degree_centrality(db) #sorted(nx.in_degree_centrality(db).items(), key=lambda x: x[1]) print "calculating out deg cent" out_deg_cent = nx.out_degree_centrality(db) #sorted(nx.out_degree_centrality(db).items(), key=lambda x: x[1]) print "closeness cent" closeness_cent = nx.closeness_centrality(db) #sorted(nx.closeness_centrality(db).items(), key=lambda x: x[1]) #print "betweenness cent" #btwn_cent = nx.betweenness_centrality(db) #sorted(nx.betweenness_centrality(db).items(), key=lambda x: x[1]) print "done" w = open("../output/user_network_stats.csv", 'w') w.write("uid,deg_cent,in_deg_cent,out_deg_cent,closeness_cent,btwn_cent\n") for user in deg_cent.keys(): try: w.write("%s,%s,%s,%s,%s\n" % (user, deg_cent[user], in_deg_cent[user], out_deg_cent[user], closeness_cent[user])) except: pass w.close() print "drawing..." nx.draw(db) plt.savefig("path.pdf") print "done!" print "edge betweenness centrality:", nx.edge_betweenness_centrality(db) print "communicability:", nx.communicability(db) print "communicability centrality:", nx.communicability_centrality(db)
def d3_graph(graph): node_bc = nx.betweenness_centrality(graph, weight="weight") node_dc = nx.degree_centrality(graph) edge_bc = nx.edge_betweenness_centrality(graph, weight="weight") nx.set_edge_attributes(graph, 'betweenness', edge_bc) for node in graph.nodes(): graph.node[node]["bw"] = node_bc[node] graph.node[node]["dc"] = node_dc[node] # print self.new_graph.edges(data=True) d3graph = json_graph.node_link_data(graph) return json.dumps(d3graph)
def CmtyGirvanNewmanStep(G): init_ncomp = nx.number_connected_components(G) #no of components ncomp = init_ncomp while ncomp <= init_ncomp: bw = nx.edge_betweenness_centrality(G, weight='weight') #edge betweenness for G #find the edge with max centrality max_ = max(bw.values()) #find the edge with the highest centrality and remove all of them if there is more than one! for k, v in bw.iteritems(): if float(v) == max_: G.remove_edge(k[0],k[1]) #remove the central edge ncomp = nx.number_connected_components(G) #recalculate the no of components
def calculte_betweenness(self, G, bonus=True): """ Calculate Betweenness input: - G: graph - bonus: True if use my own betweenness calculator. (bonus=True by default) """ if bonus: betweenness = self.my_betweenness_calculation(G) else: betweenness = nx.edge_betweenness_centrality(G, k=None, normalized=True, weight=None, seed=None) return betweenness
def _remove_max_edge(G, weight=None): """ Removes edge with the highest value on betweenness centrality. Repeat this step until more connected components than the connected components of the original graph are detected. """ number_components = nx.number_connected_components(G) while nx.number_connected_components(G) <= number_components and G.number_of_edges(): betweenness = nx.edge_betweenness_centrality(G, weight=weight) max_value = max(betweenness.values()) # Use a list of edges because G is changed in the loop for edge in list(G.edges()): if betweenness[edge] == max_value: G.remove_edge(*edge)
def disintegrate(gr): components = list(nx.connected_components(gr)) num_comps = len(components) num_nodes = nx.number_of_nodes(gr) yield components while num_comps < num_nodes: bw = nx.edge_betweenness_centrality(gr) # betweenness dict to_remove = max(bw.keys(), key=(lambda x: bw[x])) # edge with highest betweenness gr.remove_edge(*to_remove) # throw it away components = list(nx.connected_components(gr)) new_num_comps = len(components) if new_num_comps > num_comps: num_comps = new_num_comps yield components
def gnewman(club,splitTo = 2): itteration = 0 # ok so why do I check the number of connected components # for an undirected graph it is know that a connected component of an # an undirected graph is a subgraph in which any two vertices are connected to each other by paths # this is useful for this application since we are splitting a graph into two subgraphs # ie to mathematically represent the splitting of the club while nx.number_connected_components(club) < splitTo: # returns to us edges with the weights between = nx.edge_betweenness_centrality(club,normalized=False) # we want the edges with the highest edge betweenness centrality # there might be ties so just get the max betweenness m = max(between.values()) # unpack the tuple returned to us by between.items ((u,v), maxBetweenScore) for (hU,hV),val in between.items(): # check to see if m(max betweenness score) is equal to val # removes ties along the way if val == m: club.remove_edge(hU,hV) print("removed edge %s--%s with betweenness score of %f"%(hU,hV,m)) itteration += 1 print("-------------------------") # this print out can be uncommented it simply shows the same metric as described two different ways # print(nx.number_connected_components(club),len(list(nx.connected_component_subgraphs(club)))) print("total iterations %d for splitting into %d"%(itteration,splitTo))