def test_diff_move():
    intraslice = ig.Graph.Read_Ncol("multilayer_SBM_interslice_edges.csv",
                                    directed=False)
    n = intraslice.vcount()
    layer_vec = [0] * n
    membership = list(range(n))

    part_rbc = louvain.RBConfigurationVertexPartition(
        intraslice, resolution_parameter=1.0, initial_membership=membership)
    part_weighted_layers = louvain.RBConfigurationVertexPartitionWeightedLayers(
        intraslice,
        resolution_parameter=1.0,
        layer_vec=layer_vec,
        initial_membership=membership)

    # check diff_move() - quality() consistency across 100 random moves
    for repeat in range(100):
        v = randint(0, n - 1)
        c = randint(0, n - 1)
        old_quality = part_weighted_layers.quality()
        wl_diff = part_weighted_layers.diff_move(v, c)
        part_weighted_layers.move_node(v, c)
        true_diff = part_weighted_layers.quality() - old_quality

        rbc_diff = part_rbc.diff_move(v, c)
        part_rbc.move_node(v, c)

        assert isclose(
            wl_diff, true_diff
        ), "WeightedLayers diff_move() inconsistent with quality()"
        assert isclose(
            wl_diff, rbc_diff
        ), "WeightedLayers diff_move() inconsistent with single-layer"
        assert isclose(part_weighted_layers.quality(), part_rbc.quality(
        )), "WeightedLayers quality() inconsistent with single-layer"

    # check rng consistency between RBConfigurationVertexPartition and its WeightedLayers variant
    # with various seeds and intraslice resolution parameters
    for gamma in np.linspace(0.5, 1.5, 10):
        shared_seed = randint(-1 << 31, (1 << 31) - 1)  # random int32

        louvain.set_rng_seed(shared_seed)
        part_weighted_layers = louvain.RBConfigurationVertexPartitionWeightedLayers(
            intraslice, resolution_parameter=gamma, layer_vec=layer_vec)
        opt = louvain.Optimiser()
        opt.optimise_partition(partition=part_weighted_layers)

        louvain.set_rng_seed(shared_seed)
        part_rbc = louvain.RBConfigurationVertexPartition(
            intraslice, resolution_parameter=gamma)
        opt = louvain.Optimiser()
        opt.optimise_partition(partition=part_rbc)

        quality_weighted_layers = part_weighted_layers.quality(
            resolution_parameter=gamma)
        quality_rbc = part_rbc.quality(resolution_parameter=gamma)
        assert isclose(
            quality_weighted_layers, quality_rbc
        ), "Intra-layer optimisation inconsistent with single-layer"
def find_example_with_4_stable_partitions():
    while True:
        G, gt3, gt9 = generate_hierarchical_SBM()

        all_parts = run_louvain(G)
        gamma_estimates = run_CHAMP(G, all_parts)
        stable_parts = gamma_estimates_to_stable_partitions(gamma_estimates)

        num_stable_partitions_below_nine = len([p for p in stable_parts if num_communities(p) <= 9])

        if num_stable_partitions_below_nine > 3:
            all_parts = run_louvain(G)
            gamma_estimates = run_CHAMP(G, all_parts)
            # stable_parts = gamma_estimates_to_stable_partitions(gamma_estimates)
            plot_CHAMP_gamma_estimates(gamma_estimates)
            plt.savefig("hierarchical_sbm_gamma_estimates.pdf")
            plt.close()

            layout = G.layout_fruchterman_reingold(niter=10 ** 3)
            for p in stable_parts:
                ig.plot(louvain.RBConfigurationVertexPartition(G, p), bbox=(1000, 1000), layout=layout,
                        target=f"hierarchical_sbm_{num_communities(p)}-community.png")
            return
        else:
            print(f"Trial completed with {num_stable_partitions_below_nine} partitions with K <= 9. Continuing...")
def test_multilayer_louvain():
    intraslice = ig.Graph.Read_Ncol("multilayer_SBM_intraslice_edges.csv",
                                    directed=False)
    interslice = ig.Graph.Read_Ncol("multilayer_SBM_interslice_edges.csv",
                                    directed=False)
    n_layers = 4
    n = intraslice.vcount() // n_layers
    layer_vec = np.array([i // n for i in range(n * n_layers)])

    intraslice.es['weight'] = 1.0
    intralayer_part = louvain.RBConfigurationVertexPartitionWeightedLayers(
        intraslice,
        resolution_parameter=1.0,
        layer_vec=layer_vec,
        weights='weight')

    for omega in np.linspace(0.5, 1.5, 10):
        interslice.es['weight'] = omega

        interlayer_part = louvain.RBConfigurationVertexPartition(
            interslice, resolution_parameter=0.0, weights='weight')

        opt = louvain.Optimiser()
        opt.optimise_partition_multiplex(
            partitions=[intralayer_part, interlayer_part])

        louvain_mod = intralayer_part.quality(
            resolution_parameter=1.0) + interlayer_part.quality()

        A = np.array(intraslice.get_adjacency()._get_data())
        C = omega * np.array(interslice.get_adjacency()._get_data())
        P = np.zeros((n_layers * n, n_layers * n))
        for i in range(n_layers):
            c_degrees = np.array(
                intraslice.degree(list(range(n * i, n * i + n))))
            c_inds = np.where(layer_vec == i)[0]
            P[np.ix_(c_inds, c_inds)] = np.outer(
                c_degrees, c_degrees.T) / (1.0 * np.sum(c_degrees))

        membership = np.array(intralayer_part.membership)
        true_mod = sum(
            calculate_coefficient(membership, X) for X in (A, -P, C))

        assert isclose(
            louvain_mod, true_mod
        ), "WeightedLayers quality() inconsistent with alternate calculation"
Пример #4
0
def plot_stable_partitions(all_parts):
    G = ig.Graph.Famous("Zachary")

    # Store shared force-directed layout to make later plotting layouts consistent
    layout = G.layout_fruchterman_reingold(niter=1000)

    # Plot stable partitions when the number of communities is restricted to 2-4
    for K in range(2, 5):
        restricted_parts = {p for p in all_parts if num_communities(p) == K}

        if len(restricted_parts) > 0:
            ranges = CHAMP_2D(G, restricted_parts, GAMMA_START, GAMMA_END)
            gamma_estimates = ranges_to_gamma_estimates(G, ranges)
            stable_parts = gamma_estimates_to_stable_partitions(gamma_estimates)

            for i, p in enumerate(stable_parts):
                ig.plot(louvain.RBConfigurationVertexPartition(G, initial_membership=p),
                        f"karate_club_{K}_stable{i}.png", bbox=(1000, 1000), layout=layout)
Пример #5
0
def run_approximated_louvain(data, rep_key, n_jobs = 1, resolution = 1.3, random_state = 0, n_clusters = 30, n_init = 20, class_label = 'approx_louvain_labels'):
	start = time.time()

	X = data.obsm[rep_key].astype('float64')
	np.random.seed(random_state)
	seeds = np.random.randint(np.iinfo(np.int32).max, size = n_init)
	
	old_n = set_numpy_thread(1)

	threads = [None] * n_jobs
	results = [None] * n_jobs

	for i in range(n_jobs):
		t = threading.Thread(target=run_one_instance_of_kmeans, args=(i, results, n_init, n_clusters, n_jobs, X, seeds))
		threads[i] = t
		t.start()

	for i in range(n_jobs):
		threads[i].join()

	set_numpy_thread(old_n)
	
	labels = list(zip(*[x for y in results for x in y]))
	uniqs = np.unique(labels, axis = 0)
	transfer_dict = {tuple(k):v for k, v in zip(uniqs, range(uniqs.shape[0]))}
	labels = [transfer_dict[x] for x in labels]

	G = construct_graph(data.uns['W_norm'])

	partition = louvain.RBConfigurationVertexPartition(G, resolution_parameter = resolution, initial_membership = labels)
	partition_agg = partition.aggregate_partition()
	optimiser = louvain.Optimiser()
	optimiser.set_rng_seed(random_state)
	diff = optimiser.optimise_partition(partition_agg)
	partition.from_coarse_partition(partition_agg)

	labels = np.array([str(x + 1) for x in partition.membership])
	categories = natsorted(np.unique(labels))
	data.obs[class_label] = pd.Categorical(values = labels, categories = categories)

	end = time.time()
	print("Approximated Louvain clustering is done. Time spent = {:.2f}s.".format(end - start))
Пример #6
0
def run_louvain(data, affinity = 'W_norm', resolution = 1.3, random_state = 0):
	start = time.time()

	W = None
	if affinity == 'W_norm':
		W = data.uns['W_norm']
	elif affinity == 'W_diffmap':
		W = calculate_affinity_matrix(data.uns['diffmap_knn_indices'], data.uns['diffmap_knn_distances'])
	else:
		W_diffmap = calculate_affinity_matrix(data.uns['diffmap_knn_indices'], data.uns['diffmap_knn_distances'])
		W, diag_tmp, diag_half_tmp = calculate_normalized_affinity(W_diffmap)

	G = construct_graph(W)

	partition = louvain.RBConfigurationVertexPartition(G, resolution_parameter = resolution)
	optimiser = louvain.Optimiser()
	optimiser.set_rng_seed(random_state)
	diff = optimiser.optimise_partition(partition)

	labels = np.array([str(x + 1) for x in partition.membership])
	categories = natsorted(np.unique(labels))
	data.obs[aff2lab[affinity]] = pd.Categorical(values = labels, categories = categories)
	end = time.time()
	print("Louvain clustering is done. Time spent = {:.2f}s.".format(end - start))
    p2 = 0.2
    p3 = 0.05
    p4 = 0.005
    B = 7
    N = B * 8

    if not os.path.exists("community_scales_graph.p"):
        pref_matrix = [[p1, p2, p3, p3, p4, p4, p4, p4],
                       [p2, p1, p3, p3, p4, p4, p4, p4],
                       [p3, p3, p1, p2, p4, p4, p4, p4],
                       [p3, p3, p2, p1, p4, p4, p4, p4],
                       [p4, p4, p4, p4, p1, p2, p3, p3],
                       [p4, p4, p4, p4, p2, p1, p3, p3],
                       [p4, p4, p4, p4, p3, p3, p1, p2],
                       [p4, p4, p4, p4, p3, p3, p2, p1]]
        block_sizes = [B] * 8
        G = ig.Graph.SBM(N, pref_matrix, block_sizes)
        pickle.dump(G, open("community_scales_graph.p", "wb"))

    G = pickle.load(open("community_scales_graph.p", "rb"))
    layout = G.layout_fruchterman_reingold(niter=10000)

    membership0 = [0] * N
    membership1 = [0] * (4 * B) + [1] * (4 * B)
    membership2 = [0] * (2 * B) + [1] * (2 * B) + [2] * (2 * B) + [3] * (2 * B)
    membership3 = [i // B for i in range(N)]

    for i, m in enumerate([membership0, membership1, membership2, membership3]):
        out = ig.plot(louvain.RBConfigurationVertexPartition(G, initial_membership=m), f"community_scales{i}.png",
                      layout=layout, bbox=(600, 600))
Пример #8
0
def run_louvain_multilayer(intralayer_graph,
                           interlayer_graph,
                           layer_vec,
                           weight='weight',
                           resolution=1.0,
                           omega=1.0,
                           nruns=1):
    logging.debug('Shuffling node ids')
    t = time()
    mu = np.sum(intralayer_graph.es[weight]) + interlayer_graph.ecount()

    use_RBCweighted = hasattr(louvain,
                              'RBConfigurationVertexPartitionWeightedLayers')

    outparts = []
    for run in range(nruns):
        rand_perm = list(np.random.permutation(interlayer_graph.vcount()))
        # rand_perm = list(range(interlayer_graph.vcount()))
        rperm = rev_perm(rand_perm)
        interslice_layer_rand = interlayer_graph.permute_vertices(rand_perm)
        rlayer_vec = permute_vector(rand_perm, layer_vec)

        rintralayer_graph = intralayer_graph.permute_vertices(rand_perm)
        #
        if use_RBCweighted:
            rlayers = [
                intralayer_graph
            ]  #  one layer representing all intralayer connections here
        else:
            rlayers = _create_multilayer_igraphs_from_super_adj_igraph(
                rintralayer_graph, layer_vec=rlayer_vec)

        logging.debug('time: {:.4f}'.format(time() - t))

        t = time()

        #create the partition objects
        layer_partition_objs = []

        logging.debug('creating partition objects')
        t = time()

        for i, layer in enumerate(
                rlayers):  #these are the shuffled igraph slice objects
            try:
                res = resolution[i]
            except:
                res = resolution

            if use_RBCweighted:

                cpart = louvain.RBConfigurationVertexPartitionWeightedLayers(
                    layer,
                    layer_vec=rlayer_vec,
                    weights=weight,
                    resolution_parameter=res)
            else:
                #This creates individual VertexPartition for each layer.  Much slower to optimize.
                cpart = louvain.RBConfigurationVertexPartition(
                    layer, weights=weight, resolution_parameter=res)

            layer_partition_objs.append(cpart)

        coupling_partition = louvain.RBConfigurationVertexPartition(
            interslice_layer_rand, weights=weight, resolution_parameter=0)

        all_layer_partobjs = layer_partition_objs + [coupling_partition]

        optimiser = louvain.Optimiser()
        logging.debug('time: {:.4f}'.format(time() - t))
        logging.debug('running optimiser')
        t = time()

        layer_weights = [1] * len(rlayers) + [omega]
        improvement = optimiser.optimise_partition_multiplex(
            all_layer_partobjs, layer_weights=layer_weights)

        #the membership for each of the partitions is tied together.
        finalpartition = permute_vector(rperm,
                                        all_layer_partobjs[0].membership)
        reversed_partobj = []
        #go back and reverse the graphs associated with each of the partobj.  this allows for properly calculating exp edges with partobj
        #This is not ideal.  Could we just reverse the permutation?
        for layer in layer_partition_objs:
            if use_RBCweighted:
                reversed_partobj.append(
                    louvain.RBConfigurationVertexPartitionWeightedLayers(
                        graph=layer.graph.permute_vertices(rperm),
                        initial_membership=finalpartition,
                        weights=weight,
                        layer_vec=layer_vec,
                        resolution_parameter=layer.resolution_parameter))
            else:
                reversed_partobj.append(
                    louvain.RBConfigurationVertexPartition(
                        graph=layer.graph.permute_vertices(rperm),
                        initial_membership=finalpartition,
                        weights=weight,
                        resolution_parameter=layer.resolution_parameter))
        coupling_partition_rev = louvain.RBConfigurationVertexPartition(
            graph=coupling_partition.graph.permute_vertices(rperm),
            initial_membership=finalpartition,
            weights=weight,
            resolution_parameter=0)
        #use only the intralayer part objs
        A = _get_sum_internal_edges_from_partobj_list(reversed_partobj,
                                                      weight=weight)
        if use_RBCweighted:  #should only one partobj here representing all layers
            P = get_expected_edges_ml(reversed_partobj[0],
                                      layer_vec=layer_vec,
                                      weight=weight)
        else:
            P = _get_sum_expected_edges_from_partobj_list(reversed_partobj,
                                                          weight=weight)
        C = get_sum_internal_edges(coupling_partition_rev, weight=weight)
        outparts.append({'partition': np.array(finalpartition),
             'resolution': resolution,
             'coupling':omega,
             'orig_mod': (.5/mu)*(_get_modularity_from_partobj_list(reversed_partobj)\
                   +omega*coupling_partition_rev.quality()),
             'int_edges': A,
             'exp_edges': P,
            'int_inter_edges':C})

    logging.debug('time: {:.4f}'.format(time() - t))
    return outparts
    N = 600
    B = N // 3
    p_in1 = 10 / 99
    p_in2 = p_in1 * 0.75  # 5/66
    p_out1 = 0.25 / 40  # 1/160

    for i, p_out2 in enumerate([0.02, 0.035, 0.05]):  # delta
        pref_matrix = [[p_in1, p_out1, p_out1], [p_out1, p_in2, p_out2],
                       [p_out1, p_out2, p_in2]]
        block_sizes = [B] * 3
        G = ig.Graph.SBM(N, pref_matrix, block_sizes)
        assert G.is_connected()

        ground_truth = tuple(i // B for i in range(N))
        true_gamma = gamma_estimate(G, ground_truth)
        ground_truth2 = tuple(min(1, i // B) for i in range(N))
        true_gamma2 = gamma_estimate(G, ground_truth2)

        # Store shared force-directed layout to make later plotting layouts consistent
        layout = G.layout_fruchterman_reingold(niter=1000)

        out2 = ig.plot(louvain.RBConfigurationVertexPartition(G, ground_truth),
                       f"bistable_sbm_delta{i}_2-community.png",
                       bbox=(1000, 1000),
                       layout=layout)
        out3 = ig.plot(louvain.RBConfigurationVertexPartition(
            G, ground_truth2),
                       f"bistable_sbm_delta{i}_3-community.png",
                       bbox=(1000, 1000),
                       layout=layout)
def plot_SBM_example():
    G, gt3, gt9 = generate_hierarchical_SBM()
    layout = G.layout_fruchterman_reingold(niter=10 ** 3)
    for p in [gt3, gt9]:
        ig.plot(louvain.RBConfigurationVertexPartition(G, p), bbox=(1000, 1000), layout=layout,
                target=f"hierarchical_sbm_example_{num_communities(p)}.png")
def louvain_part(G):
    return louvain.RBConfigurationVertexPartition(G)
Пример #12
0
from modularitypruning.plotting import plot_adjacency
import matplotlib.pyplot as plt

if __name__ == "__main__":
    p1 = 0.4
    p2 = 0.2
    p3 = 0.3
    p4 = 0.005
    p5 = 0.05

    N = 120
    B = N // 3

    pref_matrix = [[p1, p4, p4],
                   [p4, p2, p5],
                   [p4, p5, p3]]
    block_sizes = [B] * 3
    G = ig.Graph.SBM(N, pref_matrix, block_sizes)

    plt.tight_layout()
    plt.rc('text', usetex=True)
    plt.rc('font', family='serif')
    plot_adjacency(G.get_adjacency().data)
    plt.title("Example SBM Adjacency Matrix", fontsize=14)
    plt.savefig("example_SBM_adjacency.png", dpi=200)

    out = ig.plot(louvain.RBConfigurationVertexPartition(G, initial_membership=[i // B for i in range(N)]),
                  "example_SBM_layout.png",
                  bbox=(750, 750),
                  layout=G.layout_fruchterman_reingold(niter=10000))
Пример #13
0
                                                    interslice_weight=interslice_weight,
                                                    slice_attr='t',
                                                    vertex_id_attr='ccode')


#%% Do community detection
print('\nDoing community detection...')
n_repl = 100
resolutions = [0.6, 1.1, 1.7]
for resolution in resolutions:
    memberships = []
    print('Detecting communities using resolution parameter {0}'.format(resolution))
    for itr in range(n_repl):
      print('\tRun {0:02d}'.format(itr))

      partition_intraslice = [louvain.RBConfigurationVertexPartition(H, weights='weight',
                                                resolution_parameter=resolution)
                              for H in G_intraslice]
      partition_interslice = louvain.CPMVertexPartition(G_interslice,
                                                        weights='weight',
                                                        node_sizes=G_interslice.vs['node_size'], 
                                                        resolution_parameter=0)
      
      ##%% Optimise partitions
      opt = louvain.Optimiser()
      opt.consider_comms = louvain.ALL_NEIGH_COMMS
      opt.optimise_partition_multiplex(partition_intraslice + [partition_interslice])

      # The membership in all partitions will be identical, so simply
      # consider the membership for the interslice partition and graph.
      memberships.append(partition_interslice.membership)
    
Пример #14
0
    def _cluster(self,
                 aData,
                 resolution,
                 clusterMin=10,
                 clusteringAlgorithm='leiden'
                 ) -> Tuple[pd.DataFrame, pd.DataFrame]:
        """
        Performs the clustering. This function is a little more complicated
        than strictly necessary because it preserves the information about
        the cluster label of each cell during the iterations of the modularity
        optimization. The final result where global modularity has been
        optimized is saved in the task's output subdir, whereas the iteration
        results are saved in output/iterations. It is sometimes useful to expore
        the cluster labels of cells from modularities prior to steady state, as
        they generally reflect coherent groupings that are more granular than
        the final assignments.

        Args:
            aData: anndata object to use for clustering
            resolution: resolution for modularity calculation
            clusterMin: minimum number of cells that must be in a cluster
                        to keep that cluster
            clusteringAlgorithm: choice of algorithm to use for modularity
                                 optimization, currently leiden and louvain are
                                 supported
        Returns:
            a tuple of dataframes, first is a dataframe containig the cluster
            labels from all rounds of modularity optimization, second is just
            the final round of optimization. Index is always cell id
        """
        g = Neighbors(aData).to_igraph()

        if clusteringAlgorithm == 'louvain':
            import louvain as clAlgo
            print('using louvain algorithm')
        elif clusteringAlgorithm == 'leiden':
            import leidenalg as clAlgo
            print('using leiden algorithm')

        optimiser = clAlgo.Optimiser()
        tracking = []
        partition = clAlgo.RBConfigurationVertexPartition(
            g, weights='weight', resolution_parameter=resolution)
        partition_agg = partition.aggregate_partition()
        print(partition.summary())

        diff = optimiser.move_nodes(partition_agg)
        while diff > 0.0:
            partition.from_coarse_partition(partition_agg)
            partition_agg = partition_agg.aggregate_partition()
            tracking.append(partition.membership)
            print(partition_agg.summary())
            diff = optimiser.move_nodes(partition_agg)

        df = pd.DataFrame(tracking, columns=aData.obs.index).T

        clusteringOutput = df.iloc[:, [-1]].copy(deep=True)
        colLabel = 'kValue_{}_resolution_{}'.format(self.kValue,
                                                    int(self.resolution))
        clusteringOutput.columns = [colLabel]
        clusteringOutputGrouped = clusteringOutput.groupby(colLabel).size()

        toZero = clusteringOutputGrouped[
            clusteringOutputGrouped < int(clusterMin)].index.values.tolist()
        mask = clusteringOutput[colLabel].isin(toZero)
        clusteringOutput[colLabel] = clusteringOutput[colLabel].where(~mask,
                                                                      other=-1)
        print('Clustering yields {} clusters with at least {} cells'.format(
            clusteringOutput[colLabel].unique().astype(int).max(), clusterMin))

        return df, clusteringOutput