def test_diff_move(): intraslice = ig.Graph.Read_Ncol("multilayer_SBM_interslice_edges.csv", directed=False) n = intraslice.vcount() layer_vec = [0] * n membership = list(range(n)) part_rbc = louvain.RBConfigurationVertexPartition( intraslice, resolution_parameter=1.0, initial_membership=membership) part_weighted_layers = louvain.RBConfigurationVertexPartitionWeightedLayers( intraslice, resolution_parameter=1.0, layer_vec=layer_vec, initial_membership=membership) # check diff_move() - quality() consistency across 100 random moves for repeat in range(100): v = randint(0, n - 1) c = randint(0, n - 1) old_quality = part_weighted_layers.quality() wl_diff = part_weighted_layers.diff_move(v, c) part_weighted_layers.move_node(v, c) true_diff = part_weighted_layers.quality() - old_quality rbc_diff = part_rbc.diff_move(v, c) part_rbc.move_node(v, c) assert isclose( wl_diff, true_diff ), "WeightedLayers diff_move() inconsistent with quality()" assert isclose( wl_diff, rbc_diff ), "WeightedLayers diff_move() inconsistent with single-layer" assert isclose(part_weighted_layers.quality(), part_rbc.quality( )), "WeightedLayers quality() inconsistent with single-layer" # check rng consistency between RBConfigurationVertexPartition and its WeightedLayers variant # with various seeds and intraslice resolution parameters for gamma in np.linspace(0.5, 1.5, 10): shared_seed = randint(-1 << 31, (1 << 31) - 1) # random int32 louvain.set_rng_seed(shared_seed) part_weighted_layers = louvain.RBConfigurationVertexPartitionWeightedLayers( intraslice, resolution_parameter=gamma, layer_vec=layer_vec) opt = louvain.Optimiser() opt.optimise_partition(partition=part_weighted_layers) louvain.set_rng_seed(shared_seed) part_rbc = louvain.RBConfigurationVertexPartition( intraslice, resolution_parameter=gamma) opt = louvain.Optimiser() opt.optimise_partition(partition=part_rbc) quality_weighted_layers = part_weighted_layers.quality( resolution_parameter=gamma) quality_rbc = part_rbc.quality(resolution_parameter=gamma) assert isclose( quality_weighted_layers, quality_rbc ), "Intra-layer optimisation inconsistent with single-layer"
def find_example_with_4_stable_partitions(): while True: G, gt3, gt9 = generate_hierarchical_SBM() all_parts = run_louvain(G) gamma_estimates = run_CHAMP(G, all_parts) stable_parts = gamma_estimates_to_stable_partitions(gamma_estimates) num_stable_partitions_below_nine = len([p for p in stable_parts if num_communities(p) <= 9]) if num_stable_partitions_below_nine > 3: all_parts = run_louvain(G) gamma_estimates = run_CHAMP(G, all_parts) # stable_parts = gamma_estimates_to_stable_partitions(gamma_estimates) plot_CHAMP_gamma_estimates(gamma_estimates) plt.savefig("hierarchical_sbm_gamma_estimates.pdf") plt.close() layout = G.layout_fruchterman_reingold(niter=10 ** 3) for p in stable_parts: ig.plot(louvain.RBConfigurationVertexPartition(G, p), bbox=(1000, 1000), layout=layout, target=f"hierarchical_sbm_{num_communities(p)}-community.png") return else: print(f"Trial completed with {num_stable_partitions_below_nine} partitions with K <= 9. Continuing...")
def test_multilayer_louvain(): intraslice = ig.Graph.Read_Ncol("multilayer_SBM_intraslice_edges.csv", directed=False) interslice = ig.Graph.Read_Ncol("multilayer_SBM_interslice_edges.csv", directed=False) n_layers = 4 n = intraslice.vcount() // n_layers layer_vec = np.array([i // n for i in range(n * n_layers)]) intraslice.es['weight'] = 1.0 intralayer_part = louvain.RBConfigurationVertexPartitionWeightedLayers( intraslice, resolution_parameter=1.0, layer_vec=layer_vec, weights='weight') for omega in np.linspace(0.5, 1.5, 10): interslice.es['weight'] = omega interlayer_part = louvain.RBConfigurationVertexPartition( interslice, resolution_parameter=0.0, weights='weight') opt = louvain.Optimiser() opt.optimise_partition_multiplex( partitions=[intralayer_part, interlayer_part]) louvain_mod = intralayer_part.quality( resolution_parameter=1.0) + interlayer_part.quality() A = np.array(intraslice.get_adjacency()._get_data()) C = omega * np.array(interslice.get_adjacency()._get_data()) P = np.zeros((n_layers * n, n_layers * n)) for i in range(n_layers): c_degrees = np.array( intraslice.degree(list(range(n * i, n * i + n)))) c_inds = np.where(layer_vec == i)[0] P[np.ix_(c_inds, c_inds)] = np.outer( c_degrees, c_degrees.T) / (1.0 * np.sum(c_degrees)) membership = np.array(intralayer_part.membership) true_mod = sum( calculate_coefficient(membership, X) for X in (A, -P, C)) assert isclose( louvain_mod, true_mod ), "WeightedLayers quality() inconsistent with alternate calculation"
def plot_stable_partitions(all_parts): G = ig.Graph.Famous("Zachary") # Store shared force-directed layout to make later plotting layouts consistent layout = G.layout_fruchterman_reingold(niter=1000) # Plot stable partitions when the number of communities is restricted to 2-4 for K in range(2, 5): restricted_parts = {p for p in all_parts if num_communities(p) == K} if len(restricted_parts) > 0: ranges = CHAMP_2D(G, restricted_parts, GAMMA_START, GAMMA_END) gamma_estimates = ranges_to_gamma_estimates(G, ranges) stable_parts = gamma_estimates_to_stable_partitions(gamma_estimates) for i, p in enumerate(stable_parts): ig.plot(louvain.RBConfigurationVertexPartition(G, initial_membership=p), f"karate_club_{K}_stable{i}.png", bbox=(1000, 1000), layout=layout)
def run_approximated_louvain(data, rep_key, n_jobs = 1, resolution = 1.3, random_state = 0, n_clusters = 30, n_init = 20, class_label = 'approx_louvain_labels'): start = time.time() X = data.obsm[rep_key].astype('float64') np.random.seed(random_state) seeds = np.random.randint(np.iinfo(np.int32).max, size = n_init) old_n = set_numpy_thread(1) threads = [None] * n_jobs results = [None] * n_jobs for i in range(n_jobs): t = threading.Thread(target=run_one_instance_of_kmeans, args=(i, results, n_init, n_clusters, n_jobs, X, seeds)) threads[i] = t t.start() for i in range(n_jobs): threads[i].join() set_numpy_thread(old_n) labels = list(zip(*[x for y in results for x in y])) uniqs = np.unique(labels, axis = 0) transfer_dict = {tuple(k):v for k, v in zip(uniqs, range(uniqs.shape[0]))} labels = [transfer_dict[x] for x in labels] G = construct_graph(data.uns['W_norm']) partition = louvain.RBConfigurationVertexPartition(G, resolution_parameter = resolution, initial_membership = labels) partition_agg = partition.aggregate_partition() optimiser = louvain.Optimiser() optimiser.set_rng_seed(random_state) diff = optimiser.optimise_partition(partition_agg) partition.from_coarse_partition(partition_agg) labels = np.array([str(x + 1) for x in partition.membership]) categories = natsorted(np.unique(labels)) data.obs[class_label] = pd.Categorical(values = labels, categories = categories) end = time.time() print("Approximated Louvain clustering is done. Time spent = {:.2f}s.".format(end - start))
def run_louvain(data, affinity = 'W_norm', resolution = 1.3, random_state = 0): start = time.time() W = None if affinity == 'W_norm': W = data.uns['W_norm'] elif affinity == 'W_diffmap': W = calculate_affinity_matrix(data.uns['diffmap_knn_indices'], data.uns['diffmap_knn_distances']) else: W_diffmap = calculate_affinity_matrix(data.uns['diffmap_knn_indices'], data.uns['diffmap_knn_distances']) W, diag_tmp, diag_half_tmp = calculate_normalized_affinity(W_diffmap) G = construct_graph(W) partition = louvain.RBConfigurationVertexPartition(G, resolution_parameter = resolution) optimiser = louvain.Optimiser() optimiser.set_rng_seed(random_state) diff = optimiser.optimise_partition(partition) labels = np.array([str(x + 1) for x in partition.membership]) categories = natsorted(np.unique(labels)) data.obs[aff2lab[affinity]] = pd.Categorical(values = labels, categories = categories) end = time.time() print("Louvain clustering is done. Time spent = {:.2f}s.".format(end - start))
p2 = 0.2 p3 = 0.05 p4 = 0.005 B = 7 N = B * 8 if not os.path.exists("community_scales_graph.p"): pref_matrix = [[p1, p2, p3, p3, p4, p4, p4, p4], [p2, p1, p3, p3, p4, p4, p4, p4], [p3, p3, p1, p2, p4, p4, p4, p4], [p3, p3, p2, p1, p4, p4, p4, p4], [p4, p4, p4, p4, p1, p2, p3, p3], [p4, p4, p4, p4, p2, p1, p3, p3], [p4, p4, p4, p4, p3, p3, p1, p2], [p4, p4, p4, p4, p3, p3, p2, p1]] block_sizes = [B] * 8 G = ig.Graph.SBM(N, pref_matrix, block_sizes) pickle.dump(G, open("community_scales_graph.p", "wb")) G = pickle.load(open("community_scales_graph.p", "rb")) layout = G.layout_fruchterman_reingold(niter=10000) membership0 = [0] * N membership1 = [0] * (4 * B) + [1] * (4 * B) membership2 = [0] * (2 * B) + [1] * (2 * B) + [2] * (2 * B) + [3] * (2 * B) membership3 = [i // B for i in range(N)] for i, m in enumerate([membership0, membership1, membership2, membership3]): out = ig.plot(louvain.RBConfigurationVertexPartition(G, initial_membership=m), f"community_scales{i}.png", layout=layout, bbox=(600, 600))
def run_louvain_multilayer(intralayer_graph, interlayer_graph, layer_vec, weight='weight', resolution=1.0, omega=1.0, nruns=1): logging.debug('Shuffling node ids') t = time() mu = np.sum(intralayer_graph.es[weight]) + interlayer_graph.ecount() use_RBCweighted = hasattr(louvain, 'RBConfigurationVertexPartitionWeightedLayers') outparts = [] for run in range(nruns): rand_perm = list(np.random.permutation(interlayer_graph.vcount())) # rand_perm = list(range(interlayer_graph.vcount())) rperm = rev_perm(rand_perm) interslice_layer_rand = interlayer_graph.permute_vertices(rand_perm) rlayer_vec = permute_vector(rand_perm, layer_vec) rintralayer_graph = intralayer_graph.permute_vertices(rand_perm) # if use_RBCweighted: rlayers = [ intralayer_graph ] # one layer representing all intralayer connections here else: rlayers = _create_multilayer_igraphs_from_super_adj_igraph( rintralayer_graph, layer_vec=rlayer_vec) logging.debug('time: {:.4f}'.format(time() - t)) t = time() #create the partition objects layer_partition_objs = [] logging.debug('creating partition objects') t = time() for i, layer in enumerate( rlayers): #these are the shuffled igraph slice objects try: res = resolution[i] except: res = resolution if use_RBCweighted: cpart = louvain.RBConfigurationVertexPartitionWeightedLayers( layer, layer_vec=rlayer_vec, weights=weight, resolution_parameter=res) else: #This creates individual VertexPartition for each layer. Much slower to optimize. cpart = louvain.RBConfigurationVertexPartition( layer, weights=weight, resolution_parameter=res) layer_partition_objs.append(cpart) coupling_partition = louvain.RBConfigurationVertexPartition( interslice_layer_rand, weights=weight, resolution_parameter=0) all_layer_partobjs = layer_partition_objs + [coupling_partition] optimiser = louvain.Optimiser() logging.debug('time: {:.4f}'.format(time() - t)) logging.debug('running optimiser') t = time() layer_weights = [1] * len(rlayers) + [omega] improvement = optimiser.optimise_partition_multiplex( all_layer_partobjs, layer_weights=layer_weights) #the membership for each of the partitions is tied together. finalpartition = permute_vector(rperm, all_layer_partobjs[0].membership) reversed_partobj = [] #go back and reverse the graphs associated with each of the partobj. this allows for properly calculating exp edges with partobj #This is not ideal. Could we just reverse the permutation? for layer in layer_partition_objs: if use_RBCweighted: reversed_partobj.append( louvain.RBConfigurationVertexPartitionWeightedLayers( graph=layer.graph.permute_vertices(rperm), initial_membership=finalpartition, weights=weight, layer_vec=layer_vec, resolution_parameter=layer.resolution_parameter)) else: reversed_partobj.append( louvain.RBConfigurationVertexPartition( graph=layer.graph.permute_vertices(rperm), initial_membership=finalpartition, weights=weight, resolution_parameter=layer.resolution_parameter)) coupling_partition_rev = louvain.RBConfigurationVertexPartition( graph=coupling_partition.graph.permute_vertices(rperm), initial_membership=finalpartition, weights=weight, resolution_parameter=0) #use only the intralayer part objs A = _get_sum_internal_edges_from_partobj_list(reversed_partobj, weight=weight) if use_RBCweighted: #should only one partobj here representing all layers P = get_expected_edges_ml(reversed_partobj[0], layer_vec=layer_vec, weight=weight) else: P = _get_sum_expected_edges_from_partobj_list(reversed_partobj, weight=weight) C = get_sum_internal_edges(coupling_partition_rev, weight=weight) outparts.append({'partition': np.array(finalpartition), 'resolution': resolution, 'coupling':omega, 'orig_mod': (.5/mu)*(_get_modularity_from_partobj_list(reversed_partobj)\ +omega*coupling_partition_rev.quality()), 'int_edges': A, 'exp_edges': P, 'int_inter_edges':C}) logging.debug('time: {:.4f}'.format(time() - t)) return outparts
N = 600 B = N // 3 p_in1 = 10 / 99 p_in2 = p_in1 * 0.75 # 5/66 p_out1 = 0.25 / 40 # 1/160 for i, p_out2 in enumerate([0.02, 0.035, 0.05]): # delta pref_matrix = [[p_in1, p_out1, p_out1], [p_out1, p_in2, p_out2], [p_out1, p_out2, p_in2]] block_sizes = [B] * 3 G = ig.Graph.SBM(N, pref_matrix, block_sizes) assert G.is_connected() ground_truth = tuple(i // B for i in range(N)) true_gamma = gamma_estimate(G, ground_truth) ground_truth2 = tuple(min(1, i // B) for i in range(N)) true_gamma2 = gamma_estimate(G, ground_truth2) # Store shared force-directed layout to make later plotting layouts consistent layout = G.layout_fruchterman_reingold(niter=1000) out2 = ig.plot(louvain.RBConfigurationVertexPartition(G, ground_truth), f"bistable_sbm_delta{i}_2-community.png", bbox=(1000, 1000), layout=layout) out3 = ig.plot(louvain.RBConfigurationVertexPartition( G, ground_truth2), f"bistable_sbm_delta{i}_3-community.png", bbox=(1000, 1000), layout=layout)
def plot_SBM_example(): G, gt3, gt9 = generate_hierarchical_SBM() layout = G.layout_fruchterman_reingold(niter=10 ** 3) for p in [gt3, gt9]: ig.plot(louvain.RBConfigurationVertexPartition(G, p), bbox=(1000, 1000), layout=layout, target=f"hierarchical_sbm_example_{num_communities(p)}.png")
def louvain_part(G): return louvain.RBConfigurationVertexPartition(G)
from modularitypruning.plotting import plot_adjacency import matplotlib.pyplot as plt if __name__ == "__main__": p1 = 0.4 p2 = 0.2 p3 = 0.3 p4 = 0.005 p5 = 0.05 N = 120 B = N // 3 pref_matrix = [[p1, p4, p4], [p4, p2, p5], [p4, p5, p3]] block_sizes = [B] * 3 G = ig.Graph.SBM(N, pref_matrix, block_sizes) plt.tight_layout() plt.rc('text', usetex=True) plt.rc('font', family='serif') plot_adjacency(G.get_adjacency().data) plt.title("Example SBM Adjacency Matrix", fontsize=14) plt.savefig("example_SBM_adjacency.png", dpi=200) out = ig.plot(louvain.RBConfigurationVertexPartition(G, initial_membership=[i // B for i in range(N)]), "example_SBM_layout.png", bbox=(750, 750), layout=G.layout_fruchterman_reingold(niter=10000))
interslice_weight=interslice_weight, slice_attr='t', vertex_id_attr='ccode') #%% Do community detection print('\nDoing community detection...') n_repl = 100 resolutions = [0.6, 1.1, 1.7] for resolution in resolutions: memberships = [] print('Detecting communities using resolution parameter {0}'.format(resolution)) for itr in range(n_repl): print('\tRun {0:02d}'.format(itr)) partition_intraslice = [louvain.RBConfigurationVertexPartition(H, weights='weight', resolution_parameter=resolution) for H in G_intraslice] partition_interslice = louvain.CPMVertexPartition(G_interslice, weights='weight', node_sizes=G_interslice.vs['node_size'], resolution_parameter=0) ##%% Optimise partitions opt = louvain.Optimiser() opt.consider_comms = louvain.ALL_NEIGH_COMMS opt.optimise_partition_multiplex(partition_intraslice + [partition_interslice]) # The membership in all partitions will be identical, so simply # consider the membership for the interslice partition and graph. memberships.append(partition_interslice.membership)
def _cluster(self, aData, resolution, clusterMin=10, clusteringAlgorithm='leiden' ) -> Tuple[pd.DataFrame, pd.DataFrame]: """ Performs the clustering. This function is a little more complicated than strictly necessary because it preserves the information about the cluster label of each cell during the iterations of the modularity optimization. The final result where global modularity has been optimized is saved in the task's output subdir, whereas the iteration results are saved in output/iterations. It is sometimes useful to expore the cluster labels of cells from modularities prior to steady state, as they generally reflect coherent groupings that are more granular than the final assignments. Args: aData: anndata object to use for clustering resolution: resolution for modularity calculation clusterMin: minimum number of cells that must be in a cluster to keep that cluster clusteringAlgorithm: choice of algorithm to use for modularity optimization, currently leiden and louvain are supported Returns: a tuple of dataframes, first is a dataframe containig the cluster labels from all rounds of modularity optimization, second is just the final round of optimization. Index is always cell id """ g = Neighbors(aData).to_igraph() if clusteringAlgorithm == 'louvain': import louvain as clAlgo print('using louvain algorithm') elif clusteringAlgorithm == 'leiden': import leidenalg as clAlgo print('using leiden algorithm') optimiser = clAlgo.Optimiser() tracking = [] partition = clAlgo.RBConfigurationVertexPartition( g, weights='weight', resolution_parameter=resolution) partition_agg = partition.aggregate_partition() print(partition.summary()) diff = optimiser.move_nodes(partition_agg) while diff > 0.0: partition.from_coarse_partition(partition_agg) partition_agg = partition_agg.aggregate_partition() tracking.append(partition.membership) print(partition_agg.summary()) diff = optimiser.move_nodes(partition_agg) df = pd.DataFrame(tracking, columns=aData.obs.index).T clusteringOutput = df.iloc[:, [-1]].copy(deep=True) colLabel = 'kValue_{}_resolution_{}'.format(self.kValue, int(self.resolution)) clusteringOutput.columns = [colLabel] clusteringOutputGrouped = clusteringOutput.groupby(colLabel).size() toZero = clusteringOutputGrouped[ clusteringOutputGrouped < int(clusterMin)].index.values.tolist() mask = clusteringOutput[colLabel].isin(toZero) clusteringOutput[colLabel] = clusteringOutput[colLabel].where(~mask, other=-1) print('Clustering yields {} clusters with at least {} cells'.format( clusteringOutput[colLabel].unique().astype(int).max(), clusterMin)) return df, clusteringOutput