def community_dection_graph(MSTgraph, num_comms=20, mst=True): if mst: communities_generator = community.girvan_newman( MSTgraph.graph, most_valuable_edge=most_central_edge) else: communities_generator = community.girvan_newman( MSTgraph, most_valuable_edge=most_central_edge) result = [] for communities in itertools.islice(communities_generator, num_comms): result.append(tuple(sorted(c) for c in communities)) return result
def gn_time(G): # define a function to compute weighted centrality betweenness def most_central_edge(G): centrality = betweenness(G, weight='weight') return max(centrality, key=centrality.get) # initiate a list to store execution time for each algo algo_time = [] for i in tqdm(range(10)): # start start_time = time.time() # fit the model if nx.is_weighted(G): solutions = girvan_newman(G, most_valuable_edge=most_central_edge) else: solutions = girvan_newman(G) # assign the number of times partitioning k = len(G.edges) # register modularity scores modularity_scores = dict() # initiate a maximum modularity score max_score = 0 # initiate count (stopping criterion) count = 0 # iterate over solutions for community in itertools.islice(solutions, k): solution = list(sorted(c) for c in community) score = modularity(G, solution) # store modularity score modularity_scores[len(solution)] = score if score > max_score: # save the community structure with highest modularity score community_structure = list(solution) max_score = score count = 0 else: count = count + 1 if count == 5: break algo_time.append(time.time() - start_time) return np.mean(algo_time)
def get_girvan_newman(graph, num_components): gn_hierarchy = community.girvan_newman(graph) coms_gn = tuple() for partitions in itertools.islice(gn_hierarchy, num_components): coms_gn = partitions return coms_gn
def draw_community(g, position): start_time = time.clock() communities_generator = community.girvan_newman(g) end_time = time.clock() top_level_communities = next(communities_generator) next_level_communities = next(communities_generator) next_level_communities2 = next(communities_generator) next_level_communities3 = next(communities_generator) # position = nx.spring_layout(g) # calculate position for each node # pos is needed because we are going to draw a few nodes at a time, # pos fixes their positions. nx.draw(g, position, edge_color='k', with_labels=True, font_weight='light', node_size=280, width=0.9) colors = ['r', 'g', 'b', 'c', 'm', 'y'] # for c in top_level_communities: # nx.draw_networkx_nodes(g, position, nodelist=list(c), node_color=colors[index]) # index += 1 plt.title("Graph generated with GN algorithm") index = 0 for c in next_level_communities3: nx.draw_networkx_nodes(g, position, nodelist=list(c), node_color=colors[index]) index += 1 plt.show() print(end_time - start_time)
def get_communities(graph_object, iterations=5, print_communities=True): """ Uses the community module (extension for networkx) to find communities in the graph. Uses Girvan Newman method: graph_object: Graph to detect communities in iterations: How many times to attempt community subdivision using Girvan Newman The more iterations, the more aggressive the algorithm is at breaking out communities into smaller chunks. return: Community map object, (dict) """ communities_map = {} community_generator = co.girvan_newman(graph_object) for _ in range(iterations): communities = next(community_generator) for ix, community_list in enumerate(communities): if len(community_list) > 5: if print_communities: print(community_list) node_id = ix + 1 else: node_id = 0 for node in community_list: communities_map[node] = node_id return communities_map
def girvan_newman_partition(graph): partition = girvan_newman(graph) res = dict() for i, part in enumerate(partition): for j in part: res[str(j)] = i draw_graph(res, "Girvan Newman")
def best_split(wordPairs): """ Giving a Graph, return the best community partition :param Graph: a graph constructed with the most similar word pairs :return: (level of partition that gives the best performance, best performance, best partition) """ from networkx.algorithms import community from networkx.algorithms.community.quality import performance, coverage import networkx as nx Graph = nx.Graph() edges = [(pair[0][0], pair[0][1]) for pair in wordPairs] edgewidth = [pair[1] * 10 for pair in wordPairs] Graph.add_edges_from(edges) max_pc = 0 max_index = None best_communities = None communities_generator = community.girvan_newman(Graph) for i, communities in enumerate(communities_generator): p = performance(Graph, communities) c = coverage(Graph, communities) if 2 * p * c / (p + c) > max_pc: max_index = i max_pc = 2 * p * c / (p + c) best_communities = communities return (max_index, max_pc, best_communities)
def communityCalculation(self, GRAPH, reverseOrd): timecom = time.time() communities_generator = community.girvan_newman(GRAPH) print "Calculating the communities in ...." + str(time.time() - timecom) allCommunities = set() communityLevel = {} allCommunities.add(frozenset(GRAPH.nodes)) communityLevel[frozenset(GRAPH.nodes)] = 0 i = 1 for communities in itertools.islice(communities_generator, GRAPH.number_of_nodes()): if self.cnf.verbose_log: print(tuple(sorted(c) for c in communities)) for c in communities: allCommunities.add(frozenset(c)) communityLevel[frozenset(c)] = i i = i + 1 sorted_ = sorted(communityLevel.items(), key=operator.itemgetter(1), reverse=reverseOrd) return sorted_
def get_expanded_query(self, q, args): qid = args[0] selected_words = [] docids = self.get_topn_relevant_docids(qid) tfidfs = [] for docid in docids: tfidfs.append(self.get_tfidf(docid)) G = nx.Graph() for i in range(len(docids)): G.add_node(docids[i]) for j in range(i + 1, len(docids) - 1): sim = self.getsim(tfidfs[i], tfidfs[j]) if sim > 0.5: G.add_weighted_edges_from([(docids[i], docids[j], sim)]) comp = community.girvan_newman(G) partitions = tuple(sorted(c) for c in next(comp)) for partition in partitions: if len(partition) > 1: pairlist = [] for p in partition: pairlist.append( self.get_top_word(tfidf=tfidfs[docids.index(p)])) top_k = self.get_top_k(pairlist, self.topw) for (word, value) in top_k: selected_words.append(word) query_splited = q.lower().split() for word in selected_words: if word.lower() not in query_splited: query_splited.append(word) return ' '.join(query_splited)
def apply_gn(g, subsize=1000): print 'COMPUTING GIRVAN-NEWMAN SCORE' ntimes = 3 iterations = dict() g = g.to_undirected() gn_hierarchy = community.girvan_newman(g) for i in range(ntimes): coms_gn = [tuple(x) for x in next(gn_hierarchy)] max_len = max([len(c) for c in coms_gn]) min_len = min([len(c) for c in coms_gn]) max_community = [c for c in coms_gn if len(c) == max_len][0] print 'ON ITERATION ' + str(i + 1) + ' GREATEST COMMUNITY COMPOSED' \ ' BY ' + str(max_len) + ' NODES' iterations[i + 1] = coms_gn extract_info({ 'community': max_community, 'fname': './results_ita/girvan_newman/it_' + str(i + 1) + '_', 'ncommunities': len(coms_gn), 'maxcomlen': max_len, 'mincomlen': min_len }) evaluate_partition({ 'alg': 'girvan-newman', 'network': g, 'partition': iterations }) return iterations
def community_algorithm(graph_edges, graph_nodes, threshold, max_depth, verbose): """ if call by SLOC, fill corrSLOC2Bt graph_node is a set graph_edges is a dictionnary: key is edge, value is list of deltas. returns generator of communities in hierarchical order """ edges_count = {} G = nx.DiGraph() G.add_nodes_from(list(graph_nodes)) for edge, deltas in graph_edges.items(): #count = sum(map(lambda delta: delta < float(threshold), deltas)) count = sum(map(lambda delta: delta < float(threshold), deltas)) edges_count[edge] = count if count > 0: G.add_edge(edge[0], edge[1], count=count) communities_generator = community.girvan_newman(G) try: com = next(communities_generator) if verbose>3: print("Number communities:",len(com)) print("Communities:",com) except StopIteration: if verbose>3: print("No Community found: number nodes {}.".format(len(graph_nodes))) return None return com
def girvan_newman(G, k, weight='weight'): '''Community detection using Girvan-Newman algorithm. Parameters ---------- G : networkx.graph k : number of communities weight : edge attribute if G is weighted or None if G is unweighted Returns ------- list_communities : list A list of k sets, and each set contains vertices in one community. Notes ----- This function only deals with undirected graph. ''' # determine most_valuable_edge according to weighted or not mvg = None if weight is None else most_valuable_edge communities = community.girvan_newman(G.to_undirected(), most_valuable_edge=mvg) # k must be not larger than number of nodes, or return an empty set if k > len(G.nodes()): return [] # get (k-1)th community partition for com in itertools.islice(communities, k-1): list_communities = list(com) return list_communities
def girvanNewmanWrapper(gwr, minNumCommunities): commGen = community.girvan_newman(gwr) commLevels = itertools.takewhile(lambda c: len(c) <= minNumCommunities, commGen) for comm in commLevels: lastComm = community return lastComm
def _girvan_newman(self) -> list: k = self._k - 1 G = nx.read_edgelist(global_variable.graph_path) # self.modularity(G, G.nodes) # nx.algorithms.community.modularity(G, [{0, 1, 2}, {3, 4, 5}]) comp = girvan_newman(G) result = () for communities in itertools.islice(comp, k): result = tuple(sorted(c) for c in communities) print(tuple(sorted(c) for c in communities)) food_item = self.get_selected_feature(global_variable.food_item) label = [] for value in range(0, len(food_item)): label.append(0) count = 0 for value in result: for index in value: label[int(index)] = count count += 1 return label
def __graph_construction(self, X): """Clustering labels after constructing graph adjacency matrix empirically. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_labels) Matrix `X`. Returns ------- community labels : a list of communities defining a community to a label association """ A = X.T.dot(X) A = normalize_laplacian(A=A, sigma=self.sigma, return_adj=True, norm_adj=True) A = triu(A) # Create the graph G = nx.from_scipy_sparse_matrix(A=A) comp = community.girvan_newman(G) limited = itertools.takewhile(lambda c: len(c) <= self.num_communities, comp) for communities in limited: communities = communities communities = sorted([(idx, int(c)) for idx in range(len(communities)) for c in communities[idx]], key=lambda x: x[1]) communities = np.array([i for i, j in communities]) return communities
def add_girvan_newman(graph, most_valuable_edge=None): communities_result = nx_community.girvan_newman(graph, most_valuable_edge) # The girvan_newman algorithm returns communities at each level of the iteration. # We choose the top level community. top_level_communities = next(communities_result) _nx_community_data_to_graph(graph, top_level_communities) return graph
def community_generator(graph): communities_generator = community.girvan_newman(graph) top_level_communities = next(communities_generator) next_level_communities = next(communities_generator) return next_level_communities # , top_level_communities
def split_graph(self, graph_to_split=None, parts=1): if graph_to_split is None: graph_to_split = self.G multi_graph = [] comp = girvan_newman(graph_to_split) def community_generator(graph): communities_generator = community.girvan_newman(graph) top_level_communities = next(communities_generator) next_level_communities = next(communities_generator) return next_level_communities # , top_level_communities next_level_communities = community_generator(graph_to_split) for lvl_comunnity in sorted(map(sorted, next_level_communities)): community_graph = nx.Graph() for node in lvl_comunnity: # for node in nodes: community_graph.add_node(node) for node_in_community in list(community_graph.nodes): try: relations = [ relation for relation in self.G.edges(node_in_community) ] for relation in relations: if community_graph.has_node( relation[0]) and community_graph.has_node( relation[1]): relation_weight = self.G[relation[0]][ relation[1]].get('weight', 0) community_graph.add_edge(node_in_community, relation, weigth=relation_weight) else: print("Missing: {}".format( relation[1] if community_graph. has_node(relation[0]) else relation[0])) except Exception as e: print(traceback.format_exc()) print("error: {}".format(str(e))) # for n_comunnity in sorted(map(sorted, next_communities)): # print(n_comunnity) multi_graph.append(community_graph) k = parts for communities in itertools.islice(comp, k): community_graph = nx.Graph() for names in tuple(sorted(c) for c in communities): for name in names: type = 'Model' if name in self.graph_type( 'Model', graph_to_split) else 'View' community_graph.add_node(name, type=type) multi_graph.append(community_graph) return multi_graph
def test_directed(self): G = nx.DiGraph(nx.path_graph(4)) communities = list(girvan_newman(G)) assert_equal(len(communities), 3) validate_communities(communities[0], [{0, 1}, {2, 3}]) validate_possible_communities(communities[1], [{0}, {1}, {2, 3}], [{0, 1}, {2}, {3}]) validate_communities(communities[2], [{0}, {1}, {2}, {3}])
def community_detection(edge_list=path+'connected-component-analysis/network-profiling-data/cid6_analysis/cid6-edge-list'): # too slow... from networkx.algorithms import community G = nx.read_edgelist(edge_list, delimiter='\t') communities_generator = community.girvan_newman(G) top_level_communities = next(communities_generator) next_level_communities = next(communities_generator) print len(sorted(map(sorted, next_level_communities)))
def test_directed(self): G = nx.DiGraph(nx.path_graph(4)) communities = list(girvan_newman(G)) assert_equal(len(communities), 3) validate_communities(communities[0], [{0, 1}, {2, 3}]) validate_possible_communities(communities[1], [{0}, {1}, {2, 3}], [{0, 1}, {2}, {3}]) validate_communities(communities[2], [{0}, {1}, {2}, {3}])
def community_gn(G, weight_key='weight', **kwargs): def most_central_edge(G): centrality = betweenness(G, weight=weight_key) return max(centrality, key=centrality.get) girvan_results = community.girvan_newman( G, most_valuable_edge=most_central_edge) return next(girvan_results)
def get_communities(graph, modularity=False, fluid=False): k = len([key for key in graph.node.keys()]) / 10 if modularity: return community.greedy_modularity_communities(graph) if fluid: return community.asyn_fluidc(graph, k) else: # how work return community.girvan_newman(graph)
def create_clusters_from_girvannewman(G): comp = community.girvan_newman(G) clusters = [] i = 0 for partition in list(sorted(c) for c in next(comp)): clusters.append(cluster("girvan_newman", partition, colors[i])) i = i + 1 return model("girvannewman", clusters)
def create_and_assign_communities(text_network): logging.info("Assigning communities") communities_generator = community.girvan_newman(text_network) top_level_communities = next(communities_generator) next_level_communities = next(communities_generator) communities = {} for community_list in next_level_communities: for item in community_list: communities[item] = next_level_communities.index(community_list) return communities
def top10_communities( self) -> Tuple[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]: if self._top10_communities is None: generator = girvan_newman(self.graph) communities = next(generator) while len(communities) < 10: communities = next(generator) self._top10_communities = nlargest(10, communities, key=len) return self._top10_communities
def compute_girvan_newman_community_metrics(G): #Depends on removing links between links of high vertex betweeness and identifying clusters modularity = 0 communities = community.girvan_newman(G) top_level_communities = next(communities) next_level_communities = next(communities) modularity += community.modularity(G, next_level_communities) print('Graph modularity based on Girvan Newmann clustering is {}'.format( modularity)) return modularity
def girvan_newman(self, k): graph = build_rank_graph() comp = alg.girvan_newman(graph) limited = itertools.takewhile(lambda c: len(c) <= k, comp) f = open(os.path.abspath('..') + "/data/girvan_newman.txt", 'w') for communities in limited: print("community count:", len(communities), ":") print(tuple(sorted(c) for c in communities), file=f) print(" ") print(" ")
def test_selfloops(self): G = nx.path_graph(4) G.add_edge(0, 0) G.add_edge(2, 2) communities = list(girvan_newman(G)) assert_equal(len(communities), 3) validate_communities(communities[0], [{0, 1}, {2, 3}]) validate_possible_communities(communities[1], [{0}, {1}, {2, 3}], [{0, 1}, {2}, {3}]) validate_communities(communities[2], [{0}, {1}, {2}, {3}])
def get_communities_grivan_newman(G): communities_generator = community.girvan_newman(G) next_level_communities = None for comm in communities_generator: max_clique = max([len(x) for x in comm]) print(max_clique) if max_clique < 0.1 * len(G.node) or max_clique < 15: next_level_communities = list(comm) break return next_level_communities
def test_most_valuable_edge(self): G = nx.Graph() G.add_weighted_edges_from([(0, 1, 3), (1, 2, 2), (2, 3, 1)]) # Let the most valuable edge be the one with the highest weight. heaviest = lambda G: max(G.edges(data='weight'), key=itemgetter(2))[:2] communities = list(girvan_newman(G, heaviest)) assert_equal(len(communities), 3) validate_communities(communities[0], [{0}, {1, 2, 3}]) validate_communities(communities[1], [{0}, {1}, {2, 3}]) validate_communities(communities[2], [{0}, {1}, {2}, {3}])
def test_selfloops(self): G = nx.path_graph(4) G.add_edge(0, 0) G.add_edge(2, 2) communities = list(girvan_newman(G)) assert_equal(len(communities), 3) validate_communities(communities[0], [{0, 1}, {2, 3}]) validate_possible_communities(communities[1], [{0}, {1}, {2, 3}], [{0, 1}, {2}, {3}]) validate_communities(communities[2], [{0}, {1}, {2}, {3}])
def main(argv): g = read_network.read_static_network(argv[1]) gn_output = list(girvan_newman(g)) solutions = [] for solution in gn_output: solutions.append(modularity(g, solution)) print('modularité maximale détectée par Girvan et Newman: ', max(solutions))
def test_undirected(self): # Start with the graph .-.-.-. G = nx.path_graph(4) communities = list(girvan_newman(G)) assert_equal(len(communities), 3) # After one removal, we get the graph .-. .-. validate_communities(communities[0], [{0, 1}, {2, 3}]) # After the next, we get the graph .-. . ., but there are two # symmetric possible versions. validate_possible_communities(communities[1], [{0}, {1}, {2, 3}], [{0, 1}, {2}, {3}]) # After the last removal, we always get the empty graph. validate_communities(communities[2], [{0}, {1}, {2}, {3}])
# In[17]: from networkx.algorithms import approximation # In[18]: from networkx.algorithms import community # In[19]: communities_generator = community.girvan_newman(G) # In[20]: top_level_communities = next(communities_generator) top_level_communities # In[72]: next_level_communities = next(communities_generator) next_level_communities
def test_no_edges(self): G = nx.empty_graph(3) communities = list(girvan_newman(G)) assert_equal(len(communities), 1) validate_communities(communities[0], [{0}, {1}, {2}])