def split_subslice_into_putative_modules(G_optimized, improvement_delta, modularity_score_objective, best_modularity): cur_components = [G_optimized.subgraph(c) for c in connected_components(G_optimized)] cur_modularity = modularity(G_optimized, cur_components, weight='weight') if cur_modularity >= modularity_score_objective: return True, best_modularity if len(n_nodes) < 4: G_optimized.remove_nodes_from(n_nodes) cur_components = [G_optimized.subgraph(c) for c in connected_components(G_optimized)] if len(cur_components) == 0: return True, best_modularity optimized_connected_components = girvan_newman(G_optimized) cur_components = sorted(next(optimized_connected_components)) cur_modularity = modularity(G_optimized, cur_components, weight='weight') if cur_modularity <= best_modularity + improvement_delta: return True, best_modularity else: optimal_components = cur_components edges_to_remove = [] for cur_edge in G_optimized.edges: included = False for n_nodes in optimal_components: if cur_edge[0] in n_nodes and cur_edge[1] in n_nodes: included = True if not included: edges_to_remove.append(cur_edge) G_optimized.remove_edges_from(edges_to_remove) return False, cur_modularity
def walktrap_algorithm(graph, t=5): # check that graph is connected and undirected assert nx.is_connected(graph) == True, "The graph must be connected" assert nx.is_directed(graph) == False, "The graph must be undirected" partitions = [] modularities = [] #initialization partition = singleton_partition(graph) modularity = nxq.modularity(graph, partition, weight='weight') partitions.append(deepcopy(partition)) modularities.append(modularity) # number of iterations num_nodes = graph.number_of_nodes() # adjacency matrix A = nx.to_numpy_matrix(graph, dtype=int) A += np.diag([1 for i in range(len(A))]) # diagonal matrix D = nx.laplacian_matrix(graph) + A Ddiag = np.diagonal(D) Dd = np.diag(np.power(Ddiag, (-0.5))) # Transition prob matrix P P = inv(D) @ A P_t = matrix_power(P, t) nodes2id = nodes_to_ind(graph) for iteration in tqdm(range(num_nodes - 1)): # index_to_id id2p = index_to_partition(partition) # computing distances dist = adjacent_communities_dist(graph, partition, P_t, nodes2id) (ind1, ind2) = min(dist, key=dist.get) C1 = id2p[ind1] C2 = id2p[ind2] # union of communities C3 = C1.union(C2) # redefine the partition partition.remove(C1) partition.remove(C2) partition.append(C3) partitions.append(deepcopy(partition)) modularities.append(nxq.modularity(graph, partition, weight='weight')) return list(reversed(partitions)), list(reversed(modularities))
def _naive_greedy_modularity_communities(G): """Find communities in graph using the greedy modularity maximization. This implementation is O(n^4), much slower than alternatives, but it is provided as an easy-to-understand reference implementation. """ # First create one community for each node communities = list([frozenset([u]) for u in G.nodes()]) # Track merges merges = [] # Greedily merge communities until no improvement is possible old_modularity = None new_modularity = modularity(G, communities) while old_modularity is None or new_modularity > old_modularity: # Save modularity for comparison old_modularity = new_modularity # Find best pair to merge trial_communities = list(communities) to_merge = None for i, u in enumerate(communities): for j, v in enumerate(communities): # Skip i=j and empty communities if j <= i or len(u) == 0 or len(v) == 0: continue # Merge communities u and v trial_communities[j] = u | v trial_communities[i] = frozenset([]) trial_modularity = modularity(G, trial_communities) if trial_modularity >= new_modularity: # Check if strictly better or tie if trial_modularity > new_modularity: # Found new best, save modularity and group indexes new_modularity = trial_modularity to_merge = (i, j, new_modularity - old_modularity) elif ( to_merge and min(i, j) < min(to_merge[0], to_merge[1]) ): # Break ties by choosing pair with lowest min id new_modularity = trial_modularity to_merge = (i, j, new_modularity - old_modularity) # Un-merge trial_communities[i] = u trial_communities[j] = v if to_merge is not None: # If the best merge improves modularity, use it merges.append(to_merge) i, j, dq = to_merge u, v = communities[i], communities[j] communities[j] = u | v communities[i] = frozenset([]) # Remove empty communities and sort communities = [c for c in communities if len(c) > 0] for com in sorted(communities, key=lambda x: len(x), reverse=True): yield com
def _2ll(G, comms): ''' Log-likelihood ratio test (LR Test) normalized by the number of edges. H0 is the configuration model. H1 is the degree-corrected planted partition model. Args: G: input networkx graph instance comms: partition of network, list of lists Returns: the log-likelihood ratio test statistic ''' # community sizes E = G.number_of_edges() # win, wout: the MLE mixing parameters of PPM try: map_comm = {v: i for i, c in enumerate(comms) for v in c} win, wout = mle_paras(G, map_comm) # the MLE win and wout gamma = (win - wout) / (np.log(win) - np.log(wout)) # the MLE gamme except RuntimeWarning: #print("RuntimeWarning", list(G.edges()), comms, win, wout) return 0. # modularity: modularity of the graph G under partition comm mod = modularity(G, comms, gamma) # constansts B = E * (np.log(win) - np.log(wout)) C = E * (np.log(wout) - wout) return 2. * (B * mod + C + E) / E # normalized by the number of edges ???
def iterate(self): while len(self.communities) > 1: C1, C2 = self.choose_communities() self.merge_communities(C1, C2) self.modularities[tuple(tuple(C) for C in self.communities)] = modularity( self.g, self.communities)
def export_log(G, communities, dataset, algorithm, d_threshold, w_threshold, path): '''export community result to a log file for manually analysis ''' with open(path, 'w') as f: # write some key information first line = "dataset: " + dataset + "\n" line += "algorithm: " + algorithm + "\n" line += "d_threshold: " + str(d_threshold) + "\n" line += "w_threshold: " + str(w_threshold) + "\n" line += "time: " + time.asctime(time.localtime(time.time())) + "\n" line += "-------------------------------------\n" line += "communities: " + str(len(communities)) + "\n" line += "modularity: " + str(round(modularity(G, communities), 3)) + "\n" line += "performance: " + str(round(performance(G, communities), 3)) + "\n" line += "=====================================\n" f.write(line) # write community line by line for community in communities: namelist = list(community) line = ", ".join(namelist) f.write(line + '\n') print("[Done] export log file:", path)
def move_node_to_other_com(graph, v, partition, initial_partition, best_modularity, theta, T): """Computes the probabilities used for the function merge_nodes_subset and finds a new partition according to those probabilities""" prob = [] new_partition = [] ind_node = find_community_i(partition, v) for C, ind_com in T: partition_copy = deepcopy(partition) partition_copy = delete_from_com(partition_copy, ind_node, v) partition_copy = add_to_community(partition_copy, ind_com, v) new_com = partition_copy[ind_com] partition_copy = [s for s in partition_copy if s != set()] mod = nxq.modularity(graph, partition_copy, weight='weight') if mod > best_modularity and is_in_initial_partition( new_com, initial_partition) == True: prob.append(np.exp((mod - best_modularity) / theta)) best_modularity = mod best_partition = partition_copy else: prob.append(0) new_partition.append(partition_copy) return prob, new_partition
def get_best_partition(graph, best_partition, best_mod, node, part): """Gets best partition by removing node to its neighbors' communities""" ind_node = find_community_i(part, node) neigh_node = graph.neighbors(node) # visit all nieghbors of the node for neigh in neigh_node: # make copy of part to not change the initial part part_bis = deepcopy(part) ind_neigh = find_community_i(part_bis, neigh) part_bis = delete_from_com(part_bis, ind_node, node) part_bis = add_to_community(part_bis, ind_neigh, node) part_bis = [s for s in part_bis if s != set()] # compute modularity of new partition mod = nxq.modularity(graph, part_bis, weight='weight') # update modularity if mod > best_mod: best_mod = mod best_partition = part_bis return best_mod, best_partition
def get_gw_ami(G, t, gt): # G -- graph # t -- heat kernel scale parameter # gt -- ground truth distribution_exponent_hk = 0.001 distribution_offset_hk = 0 C1 = sgw.undirected_normalized_heat_kernel(G, t) p1 = sgw.node_distribution(G, distribution_offset_hk, distribution_exponent_hk) p2 = np.ravel( GwGt.estimate_target_distribution({0: p1.reshape(-1, 1)}, dim_t=len(np.unique(gt)))) # Note that we are inserting prior information about the number of clusters C2 = np.diag(p2) coup, log = ot.gromov.gromov_wasserstein(C1, C2, p1, p2, loss_fun='square_loss', log=True) est_idx = np.argmax(coup, axis=1) ami = metrics.adjusted_mutual_info_score(est_idx, gt, average_method='max') comms = [set() for v in np.unique(est_idx)] for idx, val in enumerate(est_idx): comms[val].add(idx) mod = modularity(G, comms) return ami, mod
def move_nodes_fast(graph, partition): """Move nodes to its neighbors communities to maximize the modularity""" # randomize the order of the visited nodes Q = list(graph.nodes()) random.shuffle(Q) # initialize best modularity and partition best_modularity = nxq.modularity(graph, partition, weight='weight') best_partition = partition # visit all nodes at least once while len(Q) > 0: new_partition = deepcopy(best_partition) # get next node and neighbors next_node = Q.pop(0) neigh_node = graph.neighbors(next_node) ind_node = find_community_i(new_partition, next_node) # visit all neighbors for neigh in neigh_node: partition_copy = deepcopy(new_partition) ind_neigh = find_community_i(partition_copy, neigh) partition_copy = delete_from_com(partition_copy, ind_node, next_node) partition_copy = add_to_community(partition_copy, ind_neigh, next_node) partition_copy = [s for s in partition_copy if s != set()] mod = nxq.modularity(graph, partition_copy, weight='weight') if mod > best_modularity: best_modularity = mod best_partition = partition_copy new_ind_node = find_community_i(partition_copy, next_node) neigh_left = get_neighbors_not_in_com(graph, new_ind_node, partition_copy, next_node) neigh_not_in_Q = [ neigh for neigh in neigh_left if neigh not in Q ] # add those neighbors to Q again Q += neigh_not_in_Q return best_partition, best_modularity
def detect_communities(g: nx.Graph, max_it: int = 100, eps: float = 0.0001, reruns_if_not_conv: int = 5, threshold: float = 0.005, q_max: int = 7): #determine number of optimal communities and run community detection for a given network #The nodes have to be labeled form 0 to n modularity_0 = 0 modularity_1 = threshold q = 1 c = 2 * g.number_of_edges() / g.number_of_nodes() partition = () #run belief propagation community detection with increasing number of communities until the modularity of the #detected partition does not increase more then given threshold while modularity_1 - modularity_0 >= threshold: old_partition = partition beta = compute_opt_beta(q, c) modularity_0 = modularity_1 partition = run_bp_community_detection( g=g, q=q, beta=beta, max_it=max_it, eps=eps, reruns_if_not_conv=reruns_if_not_conv) modularity_1 = modularity( g, [{i for i in range(len(partition[1])) if partition[1][i] == j} for j in set(partition[1])]) if not partition[4]: curr_partition = partition partition = old_partition modularity_1 = modularity_0 modularity_0 = modularity_1 - threshold if q == 1: modularity_0 = modularity_1 - threshold if q > q_max: break print(q) q = q + 1 if len(old_partition) != 0: return q - 1, modularity_0, old_partition[1], old_partition[ 2], old_partition[3], old_partition[4] else: return q - 1, modularity_0, curr_partition[1], curr_partition[ 2], curr_partition[3], curr_partition[4]
def projected_oxygen_graph_metrics(projected_graph): communities = community.greedy_modularity_communities(projected_graph) mod_score = modularity(projected_graph, communities) #not returned try: aspl = nx.average_shortest_path_length(projected_graph) wiener = nx.wiener_index(projected_graph) except: aspl = -1 wiener = -1 return aspl, wiener, len(communities), communities, mod_score
def agglomerative_modularity(G): modularities = [] # initial grouping where each node is in its own group best_partitions = [frozenset([n]) for n in G.nodes()] prev_modularity = -1000 # modularity takes in the groupings as a set of nodes of G representing a partitioning curr_modularity = modularity(G, best_partitions) while curr_modularity > prev_modularity: merges = [] prev_modularity = curr_modularity test_partitions = list(best_partitions) for i, g1 in enumerate(best_partitions): for j, g2 in enumerate(best_partitions): # Skip i=j and empty communities if j <= i or len(g1) == 0 or len(g2) == 0: continue test_partitions[j] = g1 | g2 test_partitions[i] = frozenset([]) test_modularity = modularity(G, test_partitions) if test_modularity > curr_modularity: curr_modularity = test_modularity # add to the merging to list of merges as a tuple with delta Q as first element # and the potential merge as the second element merges.append((curr_modularity - prev_modularity, copy.deepcopy(test_partitions))) test_partitions[i] = g1 test_partitions[j] = g2 # in this implementation, tie breaking is first come first serve if len(merges) > 0: best_partitions = sorted(merges, key=lambda x: x[0], reverse=True)[0][1] else: best_partitions = test_partitions modularities.append(modularity(G, best_partitions)) partitions = [ frozenset(g) for g in sorted([g for g in best_partitions if len(g) > 0], key=lambda x: len(x), reverse=True) ] return partitions, modularities
def calc_modularity(G, communities): #计算模块度 """ :param G: :param communities: :return: """ comms = [] for com in communities: comms.append(set(com)) return modularity(G=G, communities=comms)
def girvan_newman(G, k, weight='weight', autothreshold=False): '''Community detection using Girvan-Newman algorithm. Parameters ---------- G : networkx.graph k : number of communities weight : edge attribute if G is weighted or None if G is unweighted autothreshold : thresholding automatically according to modularity value Returns ------- list_communities : list A list of k sets, and each set contains vertices in one community. Notes ----- This function only deals with undirected graph. ''' # determine most_valuable_edge according to weighted or not mvg = None if weight is None else most_valuable_edge communities = community.girvan_newman(G.to_undirected(), most_valuable_edge=mvg) if not autothreshold: # k must be not larger than number of nodes, or return an empty set if k > len(G.nodes()): return [] # get (k-1)th community partition for com in itertools.islice(communities, k - 1): list_communities = list(com) else: # find the list_communities that contributes to maximum modularity max_modularity = float('-inf') for com in itertools.islice(communities, k - 1): cur_list_communities = list(com) cur_modularity = quality.modularity(G, cur_list_communities) if cur_modularity > max_modularity: list_communities = cur_list_communities max_modularity = cur_modularity return list_communities
def calc_modularity(mdl, gtype='parameter'): """ Computes graph modularity given a graph representation of model mdl. Parameters ---------- mdl : model or graph Returns ------- modularity : Modularity """ if type(mdl) == nx.classes.graph.Graph: g = mdl else: g = get_graph(mdl, gtype) communities = list(greedy_modularity_communities(g)) m = modularity(g, communities) return m
def clustering_statistics(self, community_partition, feat_name, feat_desc, feat_interpret): """Compute quality of the community partitions.""" compl_desc = " of the partition of " + feat_desc self.add_feature( feat_name + "_modularity", lambda graph: quality.modularity(graph, community_partition), "Modularity" + compl_desc, feat_interpret, ) self.add_feature( feat_name + "_coverage", lambda graph: quality.coverage(graph, community_partition), "Coverage" + compl_desc, feat_interpret, ) self.add_feature( feat_name + "_performance", lambda graph: quality.performance(graph, community_partition), "Performance" + compl_desc, feat_interpret, ) self.add_feature( feat_name + "_inter_community_edges", lambda graph: quality.inter_community_edges( graph, community_partition), "Inter community edges" + compl_desc, feat_interpret, ) self.add_feature( feat_name + "_inter_community_non_edges", lambda graph: quality.inter_community_non_edges( graph, community_partition), "Inter community non edges" + compl_desc, feat_interpret, ) self.add_feature( feat_name + "_intra_community_edges", lambda graph: quality.intra_community_edges( graph, community_partition), "Intra community edges" + compl_desc, feat_interpret, )
def merge_nodes_subset(graph, partition, initial_partition, subset, theta): """From the initial refined partition, merges subsets only if those subsets are a subset of the communities from the initial partition""" R = get_connected_nodes(graph, subset) best_modularity = nxq.modularity(graph, partition, weight='weight') for v in R: ind_community = find_community_i(partition, v) if len(partition[ind_community]) == 1: T = get_connected_communities(graph, subset, partition) prob, new_partition = move_node_to_other_com( graph, v, partition, initial_partition, best_modularity, theta, T) if prob.count(0) == len(prob): pass else: partition = random.choices(new_partition, weights=prob)[0] return partition
def phase1(graph): """Gets the best partition by maximizing greedily the modularity function""" # initialize the communities: each node in a different community partition = singleton_partition(graph) # initialize the best modularity to spot convergence best_mod = nxq.modularity(graph, partition, weight='weight') best_partition = partition nodes = list(graph.nodes()) random.shuffle(nodes) while 1: for node in nodes: part = deepcopy(best_partition) best_mod, best_partition = get_best_partition( graph, best_partition, best_mod, node, part) if part == best_partition: break return best_partition, best_mod
def label_propagation(G, weight='weight', iterNum=6): '''Community detection using label propagation algorithm. Parameters ---------- G : networkx.graph weight : edge attribute if G is weighted or None if G is unweighted iterNum : number to repeat label propagation algorithm Returns ------- list_communities : list A list of sets, and each set contains vertices in one community. Notes ----- This function only deals with weighted and unweighted undirected graph. ''' # H is the undirected version of graph G H = G.to_undirected() max_modularity = float('-inf') for i in range(iterNum): if weight is None: cur_list_communities = list( community.label_propagation_communities(H)) else: cur_list_communities = list( community.asyn_lpa_communities(H, weight=weight)) cur_modularity = quality.modularity(H, cur_list_communities) if (cur_modularity > max_modularity): list_communities = cur_list_communities max_modularity = cur_modularity return list_communities
def main(): # Column name col_name = "ALGORITHM_cmty" # Load data if path.exists("../data/cmty_nodes.csv"): node_upload = "../data/cmty_nodes.csv" elif path.exists("../data/nodes.csv"): node_upload = "../data/nodes.csv" else: print("NO NODES TO UPLOAD!") assert (False) pd_nodes = pd.read_csv(node_upload, sep='\t', index_col=0) # Data in nice form headers = list(pd_nodes.columns) nodes = np.asarray(pd_nodes) # Aggregate file names model_names = ["GAT", "GCN", "GraphSage"] npy_names = ["../data/" + x + "_node_embeddings.npy" for x in model_names] model_cmtys = [] model_time = [] for i in range(len(npy_names)): # Load embeddings embeddings = np.load(npy_names[i]) print(embeddings.shape) # Generate node_mapping for clutsers start = timeit.default_timer() ########################################## # CODE HERE to cluster embeddings and creating node_mapping # # node_mapping can either be dictionary or array # ########################################## node_mapping = np.zeros(len(nodes)).astype(int) ########################################## stop = timeit.default_timer() model_time.append(stop - start) # Convert node_mapping to cmtys and node_to_cmty array #num_cmtys = len(set(node_mapping.values())) num_cmtys = len(set(node_mapping)) cmtys = [[] for _ in range(num_cmtys)] node_to_cmty = np.zeros(len(node_mapping)).astype(int) for j in range(len(node_to_cmty)): node_to_cmty[j] = node_mapping[j] cmtys[node_mapping[j]].append(j) model_cmtys.append(cmtys) # Add communities to nodes pd_nodes[model_names[i] + "_" + col_name] = node_to_cmty pd_nodes.to_csv("../data/cmty_nodes.csv", sep='\t') print("Creating Graph") # Load social network accordingly edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0) edges = np.asarray(edges).astype(int) G = nx.Graph() G.add_nodes_from(range(nodes.shape[0])) G.add_edges_from(list(map(tuple, edges))) print("Calculating modularity") for i in range(len(model_names)): assert (is_partition(G, model_cmtys[i])) modul = modularity(G, model_cmtys[i]) print("Results from " + model_names[i] + " ALGORITHM:") print("Modularity:", modul) print("Number of clusters:", len(model_cmtys[i])) print("Time elapsed:", model_time[i])
def girvan_newman_best_partition(G, list_partitions): """ Returns the best partition on the `list_partitions`. Returns the best partition among those generated by the Girvan-Newman algorithm. The best partition is selected according to modularity, computed using `networkx.algorithms.community.quality.modularity`. Parameters ---------- G : NetworkX graph `G` must meet 2 conditions: 1. `G` must contain only one connected componet 2. The nodes must be integers from 0 to (number_of_nodes - 1) list_partitions : list List of (number_nodes -1) lists got using `girvan_newman_partitions`. Each list contains the information about the partition of that level. Returns ------- tupla Tupla of 2 elements: Fisrt element: list with information about the best partition. It is a list of sets of nodes, each set of nodes is a community. Second element: integer, position of the partition in `list_partitions` which corresponds to the best partition. Raises ------ TypeError If `G` does not meet the conditions: 1. `G` must contain only one connected componet 2. The nodes must be integers from 0 to (number_of_nodes - 1) Example -------- To get the best partition of `G` among those detected by the Girvan-Newman algorithm:: >>> G = nx.path_graph(6) >>> partitions = girvan_newman_partitions(G) >>> bp_G, index_bp_G = girvan_newman_best_partition(G, partitions) >>> print (bp_G) ... [{0, 1, 2}, {3, 4, 5}] >>> print (index_bp_G) ... 0 To plot the dendrogram of community detection performed on graph G, highlighting the best partition:: >>> from scipy.cluster.hierarchy import dendrogram >>> # Create graph and perform community detection with Girvan-Newman >>> G = nx.path_graph(6) >>> partitions = girvan_newman_partitions(G) >>> # Compute the agglomerative matrix >>> agglomerative_mat = agglomerative_matrix(G, partitions) >>> # Find the best partition and its distance from the ground level >>> bp_G, idx_bp_G = girvan_newman_best_partition(G, partitions) >>> n_communities_bp = len(bp_G) >>> dis_bp = distance_of_partition(agglomerative_mat, n_communities_bp) >>> # Plot the dendrogram highlighting the best partition >>> dendro_bp = dendrogram(agglomerative_mat, color_threshold=dis_bp) """ # Does G meet the conditions? if nx.number_connected_components(G) > 1: raise TypeError("Bad graph type: do not use a graph with more" + " connected components") _nodes = nx.nodes(G) nn = nx.number_of_nodes(G) _good_nodes = np.arange(nn) if not set(_nodes) == set(_good_nodes): raise TypeError("Bad graph type: use a graph with nodes which" + " are integers from 0 to (number_of_nodes - 1)") # Look for the best partition best_partition = [] MAX_mod = -99 c = 0 for part in list_partitions: # Compute modularity tmp_mod = modularity(G, part) # If modularity icreases, then update `best_partition` if tmp_mod > MAX_mod: MAX_mod = tmp_mod best_partition = part id_best_part = c c += 1 return (best_partition, id_best_part)
def modularity(self, communities): return modularity(self.graph, communities)
import matplotlib.pyplot as plt import networkx as nx from networkx.algorithms.community.quality import modularity from sklearn.metrics.cluster import normalized_mutual_info_score G = nx.read_adjlist('karate_edges_77.txt') # Let each node in the graph be in its own community communities = list() for i in G.nodes(): communities.append(set([i])) # Create a list for keeping track of all merges tracking_merges = list() modnew = modularity(G, communities) print('The modularity at the beginning is', modnew) modold = None comtrial = [] modtrial = 0 num_of_merges = 0 num_merges = [] modularity_scores = [] # Maximizing the modularity to find the best social parition while (modold is None or modnew > modold): comtrial = list(communities) modold = modnew #print('The current modularity is', modold) to_be_merged = None for i, x in enumerate(communities):
##--------------------- print for label propagation result G_treated = label_prop(G, max_iter=100) labels = [G_treated.nodes[node]["label"] for node in G_treated.nodes] # print(labels) labels = list(set(labels)) partitions = [] for label in labels: partitions.append( set([ node for node in G_treated.nodes if G_treated.nodes[node]["label"] == label ])) # start = time.time() print('modularity, coverage, performance : ', modularity(G_treated, partitions), coverage(G_treated, partitions), performance(G_treated, partitions)) # end = time.time() # print(end-start) ##--------------------- print for louvain result # start = time.time() partition = community_louvain.best_partition(G) # print(partition) labels = [partition[node] for node in G.nodes] labels = list(set(labels)) partitions = [] for label in labels: partitions.append( set([node for node in G.nodes if partition[node] == label])) # print(modularity(partitions, G))
t = 20 cost = sgw.undirected_normalized_heat_kernel(G, t) d_gws = [] mis = [] coverages = [] modularities = [] for j in num_clusts: mutual_info, d_gw, coup = process_sgwl_eu(cost, database, num_nodes, j) partition = get_partition(coup) mis.append(mutual_info) d_gws.append(d_gw) coverages.append(coverage(G, partition)) modularities.append(modularity(G, partition)) # Estimate number of clusters estimated_clusters_raw_sym = num_clusts[np.argmax(modularities)] print('Number of Clusters:', estimated_clusters_raw_sym) # Now perform modularity/coverage maximizing pipeline ts = np.linspace(3, 10, 40) mis, coups, d_gws, good_t_max, good_t_grad, rt = t_selection_pipeline_undirected_eu( G, ts, estimated_clusters_raw_sym) coverages = [] for j in range(len(ts)): coup = coups[j] partition = get_partition(coup)
def modularity(self, graph): communities = greedy_modularity_communities(nx.Graph(graph)) modularity = round(quality.modularity(graph, communities), 1) if modularity < 0.1: modularity = 0.1 return modularity
def greedy_modularity_communities(G, weight=None, resolution=1): """Find communities in G using greedy modularity maximization. This function uses Clauset-Newman-Moore greedy modularity maximization [2]_. This method currently supports the Graph class. Greedy modularity maximization begins with each node in its own community and joins the pair of communities that most increases modularity until no such pair exists. This function maximizes the generalized modularity, where `resolution` is the resolution parameter, often expressed as $\gamma$. See :func:`~networkx.algorithms.community.quality.modularity`. Parameters ---------- G : NetworkX graph weight : string or None, optional (default=None) The name of an edge attribute that holds the numerical value used as a weight. If None, then each edge has weight 1. The degree is the sum of the edge weights adjacent to the node. Returns ------- list A list of sets of nodes, one for each community. Sorted by length with largest communities first. Examples -------- >>> from networkx.algorithms.community import greedy_modularity_communities >>> G = nx.karate_club_graph() >>> c = list(greedy_modularity_communities(G)) >>> sorted(c[0]) [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] See Also -------- modularity References ---------- .. [1] M. E. J Newman "Networks: An Introduction", page 224 Oxford University Press 2011. .. [2] Clauset, A., Newman, M. E., & Moore, C. "Finding community structure in very large networks." Physical Review E 70(6), 2004. .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community Detection" Phys. Rev. E74, 2006. """ # Count nodes and edges N = len(G.nodes()) m = sum([d.get("weight", 1) for u, v, d in G.edges(data=True)]) q0 = 1.0 / (2.0 * m) # Map node labels to contiguous integers label_for_node = {i: v for i, v in enumerate(G.nodes())} node_for_label = {label_for_node[i]: i for i in range(N)} # Calculate degrees k_for_label = G.degree(G.nodes(), weight=weight) k = [k_for_label[label_for_node[i]] for i in range(N)] # Initialize community and merge lists communities = {i: frozenset([i]) for i in range(N)} merges = [] # Initial modularity partition = [[label_for_node[x] for x in c] for c in communities.values()] q_cnm = modularity(G, partition, resolution=resolution) # Initialize data structures # CNM Eq 8-9 (Eq 8 was missing a factor of 2 (from A_ij + A_ji) # a[i]: fraction of edges within community i # dq_dict[i][j]: dQ for merging community i, j # dq_heap[i][n] : (-dq, i, j) for communitiy i nth largest dQ # H[n]: (-dq, i, j) for community with nth largest max_j(dQ_ij) a = [k[i] * q0 for i in range(N)] dq_dict = { i: { j: 2 * q0 * G.get_edge_data(i, j).get(weight, 1.0) - 2 * resolution * k[i] * k[j] * q0 * q0 for j in [node_for_label[u] for u in G.neighbors(label_for_node[i])] if j != i } for i in range(N) } dq_heap = [ MappedQueue([(-dq, i, j) for j, dq in dq_dict[i].items()]) for i in range(N) ] H = MappedQueue([dq_heap[i].h[0] for i in range(N) if len(dq_heap[i]) > 0]) # Merge communities until we can't improve modularity while len(H) > 1: # Find best merge # Remove from heap of row maxes # Ties will be broken by choosing the pair with lowest min community id try: dq, i, j = H.pop() except IndexError: break dq = -dq # Remove best merge from row i heap dq_heap[i].pop() # Push new row max onto H if len(dq_heap[i]) > 0: H.push(dq_heap[i].h[0]) # If this element was also at the root of row j, we need to remove the # duplicate entry from H if dq_heap[j].h[0] == (-dq, j, i): H.remove((-dq, j, i)) # Remove best merge from row j heap dq_heap[j].remove((-dq, j, i)) # Push new row max onto H if len(dq_heap[j]) > 0: H.push(dq_heap[j].h[0]) else: # Duplicate wasn't in H, just remove from row j heap dq_heap[j].remove((-dq, j, i)) # Stop when change is non-positive if dq <= 0: break # Perform merge communities[j] = frozenset(communities[i] | communities[j]) del communities[i] merges.append((i, j, dq)) # New modularity q_cnm += dq # Get list of communities connected to merged communities i_set = set(dq_dict[i].keys()) j_set = set(dq_dict[j].keys()) all_set = (i_set | j_set) - {i, j} both_set = i_set & j_set # Merge i into j and update dQ for k in all_set: # Calculate new dq value if k in both_set: dq_jk = dq_dict[j][k] + dq_dict[i][k] elif k in j_set: dq_jk = dq_dict[j][k] - 2.0 * resolution * a[i] * a[k] else: # k in i_set dq_jk = dq_dict[i][k] - 2.0 * resolution * a[j] * a[k] # Update rows j and k for row, col in [(j, k), (k, j)]: # Save old value for finding heap index if k in j_set: d_old = (-dq_dict[row][col], row, col) else: d_old = None # Update dict for j,k only (i is removed below) dq_dict[row][col] = dq_jk # Save old max of per-row heap if len(dq_heap[row]) > 0: d_oldmax = dq_heap[row].h[0] else: d_oldmax = None # Add/update heaps d = (-dq_jk, row, col) if d_old is None: # We're creating a new nonzero element, add to heap dq_heap[row].push(d) else: # Update existing element in per-row heap dq_heap[row].update(d_old, d) # Update heap of row maxes if necessary if d_oldmax is None: # No entries previously in this row, push new max H.push(d) else: # We've updated an entry in this row, has the max changed? if dq_heap[row].h[0] != d_oldmax: H.update(d_oldmax, dq_heap[row].h[0]) # Remove row/col i from matrix i_neighbors = dq_dict[i].keys() for k in i_neighbors: # Remove from dict dq_old = dq_dict[k][i] del dq_dict[k][i] # Remove from heaps if we haven't already if k != j: # Remove both row and column for row, col in [(k, i), (i, k)]: # Check if replaced dq is row max d_old = (-dq_old, row, col) if dq_heap[row].h[0] == d_old: # Update per-row heap and heap of row maxes dq_heap[row].remove(d_old) H.remove(d_old) # Update row max if len(dq_heap[row]) > 0: H.push(dq_heap[row].h[0]) else: # Only update per-row heap dq_heap[row].remove(d_old) del dq_dict[i] # Mark row i as deleted, but keep placeholder dq_heap[i] = MappedQueue() # Merge i into j and update a a[j] += a[i] a[i] = 0 communities = [ frozenset([label_for_node[i] for i in c]) for c in communities.values() ] return sorted(communities, key=len, reverse=True)
# Conductance sumOfCond = [] for i in range(len(cluster)): sumOfCond.append(conductance(new_graph, cluster[i])) condScoreS = conductance(new_graph, S) condScoreT = conductance(new_graph, T) overallCond = min(sumOfCond) # Modularity Communities barbMod = list(greedy_modularity_communities(new_graph)) # Modularity Score barbModScore = qu.modularity(new_graph, barbMod) # edge_betweenness_centrality barbedgeBetweenness = nx.edge_betweenness_centrality(new_graph, None, False) barbaverageEdge = sum(barbedgeBetweenness.values()) / len(barbedgeBetweenness) barbtotalEdge = sum(barbedgeBetweenness.values()) # print sets of nodes, one for each community. print("Communities: ", barbMod) # Modularity Score print("Modularity: ", barbModScore) # Conductance Score print("Conductance for: ", S, condScoreS) print("Conductance for: ", T, condScoreT)
def naive_greedy_modularity_communities(G, resolution=1): """Find communities in G using greedy modularity maximization. This implementation is O(n^4), much slower than alternatives, but it is provided as an easy-to-understand reference implementation. Greedy modularity maximization begins with each node in its own community and joins the pair of communities that most increases modularity until no such pair exists. This function maximizes the generalized modularity, where `resolution` is the resolution parameter, often expressed as $\gamma$. See :func:`~networkx.algorithms.community.quality.modularity`. Parameters ---------- G : NetworkX graph Returns ------- list A list of sets of nodes, one for each community. Sorted by length with largest communities first. Examples -------- >>> from networkx.algorithms.community import greedy_modularity_communities >>> G = nx.karate_club_graph() >>> c = list(greedy_modularity_communities(G)) >>> sorted(c[0]) [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] See Also -------- greedy_modularity_communities modularity """ # First create one community for each node communities = list([frozenset([u]) for u in G.nodes()]) # Track merges merges = [] # Greedily merge communities until no improvement is possible old_modularity = None new_modularity = modularity(G, communities, resolution=resolution) while old_modularity is None or new_modularity > old_modularity: # Save modularity for comparison old_modularity = new_modularity # Find best pair to merge trial_communities = list(communities) to_merge = None for i, u in enumerate(communities): for j, v in enumerate(communities): # Skip i==j and empty communities if j <= i or len(u) == 0 or len(v) == 0: continue # Merge communities u and v trial_communities[j] = u | v trial_communities[i] = frozenset([]) trial_modularity = modularity(G, trial_communities, resolution=resolution) if trial_modularity >= new_modularity: # Check if strictly better or tie if trial_modularity > new_modularity: # Found new best, save modularity and group indexes new_modularity = trial_modularity to_merge = (i, j, new_modularity - old_modularity) elif to_merge and min(i, j) < min(to_merge[0], to_merge[1]): # Break ties by choosing pair with lowest min id new_modularity = trial_modularity to_merge = (i, j, new_modularity - old_modularity) # Un-merge trial_communities[i] = u trial_communities[j] = v if to_merge is not None: # If the best merge improves modularity, use it merges.append(to_merge) i, j, dq = to_merge u, v = communities[i], communities[j] communities[j] = u | v communities[i] = frozenset([]) # Remove empty communities and sort return sorted((c for c in communities if len(c) > 0), key=len, reverse=True)
def greedy_modularity_communities(G, weight=None): """Find communities in graph using Clauset-Newman-Moore greedy modularity maximization. This method currently supports the Graph class and does not consider edge weights. Greedy modularity maximization begins with each node in its own community and joins the pair of communities that most increases modularity until no such pair exists. Parameters ---------- G : NetworkX graph Returns ------- Yields sets of nodes, one for each community. Examples -------- >>> from networkx.algorithms.community import greedy_modularity_communities >>> G = nx.karate_club_graph() >>> c = list(greedy_modularity_communities(G)) >>> sorted(c[0]) [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] References ---------- .. [1] M. E. J Newman 'Networks: An Introduction', page 224 Oxford University Press 2011. .. [2] Clauset, A., Newman, M. E., & Moore, C. "Finding community structure in very large networks." Physical Review E 70(6), 2004. """ # Count nodes and edges N = len(G.nodes()) m = sum([d.get('weight', 1) for u, v, d in G.edges(data=True)]) q0 = 1.0 / (2.0*m) # Map node labels to contiguous integers label_for_node = dict((i, v) for i, v in enumerate(G.nodes())) node_for_label = dict((label_for_node[i], i) for i in range(N)) # Calculate degrees k_for_label = G.degree(G.nodes(), weight=weight) k = [k_for_label[label_for_node[i]] for i in range(N)] # Initialize community and merge lists communities = dict((i, frozenset([i])) for i in range(N)) merges = [] # Initial modularity partition = [[label_for_node[x] for x in c] for c in communities.values()] q_cnm = modularity(G, partition) # Initialize data structures # CNM Eq 8-9 (Eq 8 was missing a factor of 2 (from A_ij + A_ji) # a[i]: fraction of edges within community i # dq_dict[i][j]: dQ for merging community i, j # dq_heap[i][n] : (-dq, i, j) for communitiy i nth largest dQ # H[n]: (-dq, i, j) for community with nth largest max_j(dQ_ij) a = [k[i]*q0 for i in range(N)] dq_dict = dict( (i, dict( (j, 2*q0 - 2*k[i]*k[j]*q0*q0) for j in [ node_for_label[u] for u in G.neighbors(label_for_node[i])] if j != i)) for i in range(N)) dq_heap = [ MappedQueue([ (-dq, i, j) for j, dq in dq_dict[i].items()]) for i in range(N)] H = MappedQueue([ dq_heap[i].h[0] for i in range(N) if len(dq_heap[i]) > 0]) # Merge communities until we can't improve modularity while len(H) > 1: # Find best merge # Remove from heap of row maxes # Ties will be broken by choosing the pair with lowest min community id try: dq, i, j = H.pop() except IndexError: break dq = -dq # Remove best merge from row i heap dq_heap[i].pop() # Push new row max onto H if len(dq_heap[i]) > 0: H.push(dq_heap[i].h[0]) # If this element was also at the root of row j, we need to remove the # duplicate entry from H if dq_heap[j].h[0] == (-dq, j, i): H.remove((-dq, j, i)) # Remove best merge from row j heap dq_heap[j].remove((-dq, j, i)) # Push new row max onto H if len(dq_heap[j]) > 0: H.push(dq_heap[j].h[0]) else: # Duplicate wasn't in H, just remove from row j heap dq_heap[j].remove((-dq, j, i)) # Stop when change is non-positive if dq <= 0: break # Perform merge communities[j] = frozenset(communities[i] | communities[j]) del communities[i] merges.append((i, j, dq)) # New modularity q_cnm += dq # Get list of communities connected to merged communities i_set = set(dq_dict[i].keys()) j_set = set(dq_dict[j].keys()) all_set = (i_set | j_set) - set([i, j]) both_set = i_set & j_set # Merge i into j and update dQ for k in all_set: # Calculate new dq value if k in both_set: dq_jk = dq_dict[j][k] + dq_dict[i][k] elif k in j_set: dq_jk = dq_dict[j][k] - 2.0*a[i]*a[k] else: # k in i_set dq_jk = dq_dict[i][k] - 2.0*a[j]*a[k] # Update rows j and k for row, col in [(j, k), (k, j)]: # Save old value for finding heap index if k in j_set: d_old = (-dq_dict[row][col], row, col) else: d_old = None # Update dict for j,k only (i is removed below) dq_dict[row][col] = dq_jk # Save old max of per-row heap if len(dq_heap[row]) > 0: d_oldmax = dq_heap[row].h[0] else: d_oldmax = None # Add/update heaps d = (-dq_jk, row, col) if d_old is None: # We're creating a new nonzero element, add to heap dq_heap[row].push(d) else: # Update existing element in per-row heap dq_heap[row].update(d_old, d) # Update heap of row maxes if necessary if d_oldmax is None: # No entries previously in this row, push new max H.push(d) else: # We've updated an entry in this row, has the max changed? if dq_heap[row].h[0] != d_oldmax: H.update(d_oldmax, dq_heap[row].h[0]) # Remove row/col i from matrix i_neighbors = dq_dict[i].keys() for k in i_neighbors: # Remove from dict dq_old = dq_dict[k][i] del dq_dict[k][i] # Remove from heaps if we haven't already if k != j: # Remove both row and column for row, col in [(k, i), (i, k)]: # Check if replaced dq is row max d_old = (-dq_old, row, col) if dq_heap[row].h[0] == d_old: # Update per-row heap and heap of row maxes dq_heap[row].remove(d_old) H.remove(d_old) # Update row max if len(dq_heap[row]) > 0: H.push(dq_heap[row].h[0]) else: # Only update per-row heap dq_heap[row].remove(d_old) del dq_dict[i] # Mark row i as deleted, but keep placeholder dq_heap[i] = MappedQueue() # Merge i into j and update a a[j] += a[i] a[i] = 0 communities = [ frozenset([label_for_node[i] for i in c]) for c in communities.values()] return sorted(communities, key=len, reverse=True)