def test_SBM_epsilon(self): np.random.seed(12345678) B1 = np.array([[0.5, 0.2], [0.2, 0.5]]) B2 = np.array([[0.7, 0.2], [0.2, 0.7]]) b_size = 200 A1 = sbm(2 * [b_size], B1) A2 = sbm(2 * [b_size], B1) A3 = sbm(2 * [b_size], B2) # non parallel test lpt_null = latent_position_test(A1, A2, n_components=2, n_bootstraps=100) lpt_alt = latent_position_test(A1, A3, n_components=2, n_bootstraps=100) self.assertTrue(lpt_null[0] > 0.05) self.assertTrue(lpt_alt[0] <= 0.05) # parallel test lpt_null = latent_position_test(A1, A2, n_components=2, n_bootstraps=100, workers=-1) lpt_alt = latent_position_test(A1, A3, n_components=2, n_bootstraps=100, workers=-1) self.assertTrue(lpt_null[0] > 0.05) self.assertTrue(lpt_alt[0] <= 0.05)
def test_SBM_dcorr(self): np.random.seed(12345678) B1 = np.array([[0.5, 0.2], [0.2, 0.5]]) B2 = np.array([[0.7, 0.2], [0.2, 0.7]]) b_size = 200 A1 = sbm(2 * [b_size], B1) A2 = sbm(2 * [b_size], B1) A3 = sbm(2 * [b_size], B2) ldt_null = latent_distribution_test(A1, A2) ldt_alt = latent_distribution_test(A1, A3) self.assertTrue(ldt_null[0] > 0.05) self.assertTrue(ldt_alt[0] <= 0.05)
def generate_data(): np.random.seed(1) p1 = [[0.2, 0.1], [0.1, 0.2]] p2 = [[0.1, 0.2], [0.2, 0.1]] n = [50, 50] g1 = [sbm(n, p1) for _ in range(20)] g2 = [sbm(n, p2) for _ in range(20)] g = g1 + g2 y = ["0"] * 20 + ["1"] * 20 return g, y
def test_SBM_epsilon(self): np.random.seed(12345678) B1 = np.array([[0.5, 0.2], [0.2, 0.5]]) B2 = np.array([[0.7, 0.2], [0.2, 0.7]]) b_size = 200 A1 = sbm(2 * [b_size], B1) A2 = sbm(2 * [b_size], B1) A3 = sbm(2 * [b_size], B2) spt_null = LatentPositionTest(n_components=2, n_bootstraps=100) spt_alt = LatentPositionTest(n_components=2, n_bootstraps=100) p_null = spt_null.fit_predict(A1, A2) p_alt = spt_alt.fit_predict(A1, A3) self.assertTrue(p_null > 0.05) self.assertTrue(p_alt <= 0.05)
def gen_sbm(p=.3, q=.15, N=1500): """ Generate an adjacency matrix. """ n = N // 3 B = np.full((3, 3), q) B[np.diag_indices_from(B)] = p A = sbm([n, n, n], B, return_labels=True) return A
def gen_sbm(p, q, assortative=True, N=1500): if not assortative: p, q = q, p n = N // 3 B = np.full((3, 3), q) B[np.diag_indices_from(B)] = p A = sbm([n, n, n], B, return_labels=True) return A
def setUp(self) -> None: estimator = SBMEstimator(directed=True, loops=False) B = np.array([[0.9, 0.1], [0.1, 0.9]]) g = sbm([50, 50], B, directed=True) labels = _n_to_labels([50, 50]) p_mat = _block_to_full(B, labels, (100, 100)) p_mat -= np.diag(np.diag(p_mat)) self.estimator = estimator self.p_mat = p_mat self.graph = g self.labels = labels
def setup_class(cls): estimator = SBMEstimator(directed=True, loops=False) B = np.array([[0.9, 0.1], [0.1, 0.9]]) g = sbm([50, 50], B, directed=True) labels = _n_to_labels([50, 50]) p_mat = _block_to_full(B, labels, (100, 100)) p_mat -= np.diag(np.diag(p_mat)) cls.estimator = estimator cls.p_mat = p_mat cls.graph = g cls.labels = labels
def test_SBM_dcorr(self): for test in self.tests.keys(): np.random.seed(12345678) B1 = np.array([[0.5, 0.2], [0.2, 0.5]]) B2 = np.array([[0.7, 0.2], [0.2, 0.7]]) b_size = 200 A1 = sbm(2 * [b_size], B1) A2 = sbm(2 * [b_size], B1) A3 = sbm(2 * [b_size], B2) ldt_null = LatentDistributionTest(test, self.tests[test], n_components=2, n_bootstraps=100) ldt_alt = LatentDistributionTest(test, self.tests[test], n_components=2, n_bootstraps=100) p_null = ldt_null.fit_predict(A1, A2) p_alt = ldt_alt.fit_predict(A1, A3) self.assertTrue(p_null > 0.05) self.assertTrue(p_alt <= 0.05)
def M(request): # module scope ensures that A and labels will always match # since they exist in separate functions # parameters n = 10 p, q = 0.9, 0.3 # block probability matrix P = np.full((2, 2), q) P[np.diag_indices_from(P)] = p # generate sbm return sbm([n] * 2, P, directed=False, return_labels=True)
def test_no_nans(assortative): # this generated a matrix with nan values before Y = gen_covariates_beta() N = 1500 # Total number of nodes n = N // 3 p, q = 0.15, 0.3 B = np.array([[p, p, q], [p, p, q], [q, q, p]]) A = sbm([n, n, n], B) # embed and plot case = CASE(assortative=assortative, n_components=2) latents = case.fit_transform(A, Y) assert np.isfinite(latents).all()
def test_SBM_fit_supervised(self): np.random.seed(8888) B = np.array([ [0.9, 0.2, 0.05, 0.1], [0.1, 0.7, 0.1, 0.1], [0.2, 0.4, 0.8, 0.5], [0.1, 0.2, 0.1, 0.7], ]) n = np.array([500, 500, 250, 250]) g = sbm(n, B, directed=True, loops=False) sbe = SBMEstimator(directed=True, loops=False) labels = _n_to_labels(n) sbe.fit(g, y=labels) B_hat = sbe.block_p_ assert_allclose(B_hat, B, atol=0.01)
def dcsbm_corr(n, p, r, theta, epsilon1=1e-3, epsilon2=1e-3, directed=False, loops=False): ''' Sample a pair of DC-SBM with the same marginal probabilities ''' Z = np.repeat(np.arange(0, np.array(n).size), n) R = r * np.ones((np.sum(n), np.sum(n))) # sample a DC-SBM w/ block prob p G = sbm(n, p, dc=theta) # fit DC-SBM to G1 to estimate P G_dcsbm = DCSBMEstimator(directed=False).fit(G, y=Z) p_mat = G_dcsbm.p_mat_ # P could be out of range p_mat[p_mat < epsilon1] = epsilon1 p_mat[p_mat > 1 - epsilon2] = 1 - epsilon2 # sample correlated graphs based on P G1, G2 = sample_edges_corr(p_mat, R, directed, loops) return G1, G2
from graspologic.simulations import sbm from graspologic.utils import remap_labels from graspologic.plot import pairplot from graspologic.embed import CovariateAssistedEmbedding import seaborn as sns n = 500 assortative = True p, q = 0.03, 0.015 if not assortative: p, q = q, p A, labels = sbm( [n, n, n], p=[[p, q, q], [q, p, q], [q, q, p]], return_labels=True, ) #%% # X = gen_covariates(labels, m1=0.8, m2=0.2, agreement=0.0) X = gen_covariates(labels, m1=0.8, m2=0.2, agreement=1) case = CovariateAssistedEmbedding(n_components=3, embedding_alg="assortative") case.fit(A, covariates=X) #%% Xhat = case.latent_left_ pairplot(Xhat, labels=labels) # # def M(): # # # module scope ensures that A and labels will always match
nca = nearest_common_ancestor(source_node, target_node).name base_prob = probs[nca] new_prob = np.random.uniform(base_prob - alpha * base_prob, base_prob + alpha * base_prob) i = source_node.name j = target_node.name sbm_probs.loc[i, j] = new_prob from graspologic.utils import symmetrize sbm_probs = sbm_probs.values sbm_probs = symmetrize(sbm_probs) fig, ax = plt.subplots(1, 1, figsize=(6, 6)) adjplot(sbm_probs, ax=ax) # %% flat_labels = [] node_data = mt.node_data for node, row in node_data.iterrows(): path = row.values[:4] path = path[~np.isnan(path)] label = path[-1] flat_labels.append(label) flat_labels = np.array(flat_labels) #%% A, flat_labels = sbm(n_per_leaf, sbm_probs, directed=False, return_labels=True) fig, ax = plt.subplots(1, 1, figsize=(6, 6)) adjplot(A, ax=ax)
#%% import numpy as np from graspologic.simulations import sbm p1 = 0.7 p2 = 0.5 p3 = 0.1 p4 = 0.3 B1 = np.array([[p1, p3], [p3, p1]]) # affinity B2 = np.array([[p1, p3], [p3, p2]]) # core-periphery B3 = np.array([[p1, p2], [p2, p1]]) # B4 = np.array([[p1, p4], [p4, p3]]) n = [50, 50] A1, labels = sbm(n, B1, return_labels=True) A2 = sbm(n, B2) A3 = sbm(n, B3) A4 = sbm(n, B4) from graspologic.embed import AdjacencySpectralEmbed import matplotlib.pyplot as plt import seaborn as sns from scipy.stats import ortho_group from graspologic.embed import selectSVD sns.set_context("talk") Vs = [] for i in range(10): As = [A1, A2, A3, A4] Xs = []
def sample_func(): return sbm(ns, B, directed=True, loops=False)
from graspologic.simulations import sbm # for simplicity, the simulation code generates samples wherein # vertices from the same community are ordered in the vertex set by # their community order. Note that it would be theoretically equivalent to # denote the total number of vertices in each community, or provide # a vector tau with the first 50 entries taking the value 1, and the # second 50 enties taking the value 0, given this fact. ns = [50, 50] n = sum(ns) n = sum(ns) # total number of vertices is the sum of the B = [[.5, .2], [.2, .05]] A = sbm(n=ns, p=B) _=heatmap(A, title="SBM(T, B) Simulation") In the above simulation, we can clearly see an apparent $4$-"block structure", which describes the fact that the probability of an edge existing depends upon which of the $4$ "blocks" the edge falls into. These blocks are the apparent "subgraphs", or square patterns, observed in the above graph. The block structure is clearly delineated by the first $50$ vertices being from a single community, and the second $50$ vertices being from a different community. It is important to note that a graph may be $SBM_n(\vec \tau, \pmb B)$ regardless of whether a block structure is visually discernable. Indeed, the block structure may only be apparent given a particular ordering of the vertices, an otherwise, may not even be discernable at all. Consider, for instance, a similar adjacency matrix to the graph plotted above, with the exact same realization, up to a permutation (reordering) of the vertices. The below graph shows the exact same set of adjacencies as-above, but wherein $\pmb A$ has had its vertices resorted randomly. The graph has an identical block structuure (up to the reordering of the vertices) as the preceding graph illustrated. import numpy as np # generate a permutation of the n vertices vtx_perm = np.random.choice(n, size=n, replace=False) # same adjacency matrix (up to reorder of the vertices) heatmap(A[[vtx_perm]] [:,vtx_perm])
mpl.rcParams[key] = val context = sns.plotting_context(context=context, font_scale=font_scale, rc=rc_dict) sns.set_context(context) set_theme() #%% from graspologic.simulations import sbm np.random.seed(8888) p = np.array([[0.7, 0.1], [0.1, 0.7]]) n = [100, 100] r = 0.9 A1, A2 = sbm_corr(n, p, r, directed=False, loops=False) _, labels = sbm(n, p, return_labels=True) from graspologic.plot import heatmap import networkx as nx g1 = nx.from_numpy_array(A1) # heatmap(A1, cbar=False) # graph embedding plt.figure() nodelist = list(sorted(g1.nodes())) colors = sns.color_palette("deep") palette = dict(zip([0, 1], colors)) node_colors = list(map(palette.get, labels)) nx.draw_spring(g1, nodelist=nodelist, node_colors=node_colors) #%% from giskard.plot import graphplot
# ### A feedforward SBM model # Here we construct a 2-block SBM where the block probabilities are feedforward with an # amount that depends on $\delta$. #%% def construct_feedforward_B(p=0.5, delta=0): B = np.array([[p, p + delta], [p - delta, p]]) return B delta = 0.1 B = construct_feedforward_B(0.5, delta) ns = [15, 15] A, labels = sbm(ns, B, directed=True, loops=False, return_labels=True) fig, axs = plt.subplots(1, 3, figsize=(12, 4)) title = "Block probabilities\n" title += r"$p = $" + f"{p}, " + r"$\delta = $" + f"{delta}" annot = np.array([[r"$p$", r"$p + \delta$"], [r"$p + \delta$", r"$p$"]]) sns.heatmap( B, vmin=0, center=0, vmax=1, cmap="RdBu_r", annot=annot, cbar=False, square=True, fmt="",
def gen_syn_data( n_classes=3, n_obs_train=200, n_obs_test=100, n_features=10, n_edges=3, n_char_features=10, signal=[0, 0], diff_coef=[0.1, 0.1], noise=[0.2, 0.2], n_communities=5, probs=[0.5, 0.1], n_iter=3, model='BA', syn_method="sign", random_seed=1996): """ Generates synthetic training and test datasets based on an underlying random graph model. Each class is defined by a set of characteristic features. Each feature starts with random values. For each observation, the characteristic features of its class are increased by "signal". Then, values on each node are altered based on the synthetic method used. Parameters: ---------- n_classes: int Number of classes n_obs_train: int Number of observations per class for the training dataset n_obs_test: int Number of observations per class for the test dataset n_features: int Number of features, each corresponding to a node in the graph n_char_features: int Number of features that are specific to each class signal: [float, float] The level of initial signal for the characteristic features, for training and test dataset respectively. Only used when `syn_method == 'diffusion'` or `syn_method == 'activation'`. diff_coef: [float, float] How much each value transmits its value over the edges, for training and test dataset respectively. Only used when `syn_method == 'diffusion'`. noise: [float, float] (Gaussian) Noise level added at the end of the information passing, for training and test dataset respectively. n_communities: int Number of graph communities for the Stochastic Block Model. Used only when `model == 'SBM'`. probs: [float, float] Probability of intra and inter cluster edges for the Stochastic Block Model. Used only when `model == 'SBM'`. model: str The random graph generation model. Can be `'BA'` for Barabási–Albert, `'ER'` for Erdős–Rényi, or `'SBM'` for Stochastic Block Model. syn_method: str The message passing synthetic process. Can be: `'diffusion'` for diffusing information over edges based on the difference on the end nodes. `'activation'` for activating a characteristic node based on its neighbors. `'sign'` for changing the sign of a characteristic node based on the average sign of its neighbors. Returns ------- X_train : a numpy ndarray with features generated for the training dataset. y_train : a numpy ndarray with labels generated for the training dataset. adj_train : the adjacency matrix of the graph generated for the training dataset. X_test : a numpy ndarray with features generated for the test dataset. y_test : a numpy ndarray with labels generated for the test dataset. adj_test : the adjacency matrix of the graph generated for the test dataset. """ np.random.seed(random_seed) if model=='ER': # Generate a random graph with the Erdos-Renyi model. graph_train = graph_test = ig.Graph.Erdos_Renyi(n=n_features, m=n_edges*n_features, directed=False) adj_train = adj_test = np.array(graph_train.get_adjacency().data) elif model=="BA": # Generate a scale-free graph with the Barabasi-Albert model. graph_train = graph_test = ig.Graph.Barabasi(n_features, n_edges, directed=False) adj_train = adj_test = np.array(graph_train.get_adjacency().data) elif model=='SBM': # Generate a random graph with the stochastic block matrix model. n = [n_features // n_communities] * n_communities p = np.full((n_communities, n_communities), probs[1]) adj_train = sbm(n=n, p=p) adj_test = sbm(n=n, p=p) graph_train = ig.Graph.Adjacency(adj_train.tolist()) graph_test = ig.Graph.Adjacency(adj_test.tolist()) elif model=='linear': g = ig.Graph() edges = [(i, i+1) for i in range(n_features-1)] g.add_vertices(n_features) g.add_edges(edges) graph_train = graph_test = g adj_train = np.array(g.get_adjacency().data) adj_test = np.array(g.get_adjacency().data) else: print("Unrecognized random graph generation model. Please use ER, BA, linear, or SBM.") X_train = [] y_train = [] X_test = [] y_test = [] char_feat = dict() if syn_method=="sign": for c in range(n_classes): # Draw the features which define this class char_features = np.random.choice(n_features,size=n_char_features,replace=False) char_feat[c] = char_features for i in range(n_obs_train): # Start from a random vector features = np.random.normal(0, 1, n_features) features_next = np.copy(features) for f in char_features: s=0 for neighbor in graph_train.neighbors(f): s+=features[neighbor] #set the sign to the average sign of the neighbours features_next[f] = np.sign(s)* np.abs(features[f]) features = features_next # Add additional noise if noise[0] > 0: features += np.random.normal(0, noise[0], n_features) X_train.append(features) y_train.append(c) for i in range(n_obs_test): # Start from a random vector features = np.random.normal(0, 1, n_features) features_next = np.copy(features) for f in char_features: s=0 for neighbor in graph_train.neighbors(f): s+=features[neighbor] # Set the sign to the average sign of the neighbours features_next[f] = np.sign(s)* np.abs(features[f]) features = features_next # Add additional noise if noise[1] > 0: features += np.random.normal(0, noise[1], n_features) X_test.append(features) y_test.append(c) elif syn_method=="diffusion": for c in range(n_classes): signal[0] = np.random.normal(signal[0], 1, 1) signal[1] = np.random.normal(signal[1], 1, 1) # Draw the features which define this class char_features = np.random.choice(n_features,size=n_char_features,replace=False) char_feat[c] = char_features for i in range(n_obs_train): # Start from a random vector features = np.abs(np.random.normal(0, 1, n_features)) # Increase the value for the characteristic features features[char_features] += np.abs(np.random.normal(signal[0], 1, n_char_features)) features = features / np.linalg.norm(features) # Diffuse values through the graph for it in range(n_iter): features_next = np.copy(features) for e in graph_train.es: features_next[e.target]+= (features[e.source] - features[e.target]) * diff_coef[0] features_next[e.source]+= (features[e.target] - features[e.source]) * diff_coef[0] features = features_next if noise[0] > 0: features += np.random.normal(0, noise[0], n_features) X_train.append(features) y_train.append(c) for i in range(n_obs_test): # Start from a random vector features = np.abs(np.random.normal(0, 1, n_features)) # Increase the value for the characteristic features features[char_features] += np.abs(np.random.normal(signal[1], 1, n_char_features)) features = features / np.linalg.norm(features) # Diffuse values through the graph for it in range(n_iter): features_next = np.copy(features) for e in graph_test.es: features_next[e.target]+= (features[e.source] - features[e.target]) * diff_coef[1] features_next[e.source]+= (features[e.target] - features[e.source]) * diff_coef[1] features = features_next if noise[1] > 0: features += np.random.normal(0, noise[1], n_features) X_test.append(features) y_test.append(c) elif syn_method=="activation": for c in range(n_classes): # Draw the features which define this class char_features = np.random.choice(n_features,size=n_char_features,replace=False) char_feat[c] = char_features for i in range(n_obs_train): # Start from a random vector features = np.random.normal(0, 1, n_features) features_next = np.copy(features) for f in char_features: s=0 degree=0 for neighbor in graph_train.neighbors(f): s+=features[neighbor] degree+=1 degree = max(degree,1) features_next[f] = np.random.normal(s/degree * signal[0],0.2) features = features_next if noise[0] > 0: features += np.random.normal(0, noise[0], n_features) X_train.append(features) y_train.append(c) for i in range(n_obs_test): # Start from a random vector features = np.random.normal(0, 1, n_features) features_next = np.copy(features) for f in char_features: s=0 degree=0 for neighbor in graph_train.neighbors(f): s+=features[neighbor] degree+=1 degree = max(degree,1) features_next[f] = np.random.normal(s/degree * signal[1],0.2) features = features_next if noise[1] > 0: features += np.random.normal(0, noise[1], n_features) X_test.append(features) y_test.append(c) else: print("Unrecognized synthetic dataset generation method!") train_idx = np.random.permutation(len(y_train)) - 1 X_train = np.array(X_train)[train_idx, :] y_train = np.array(y_train)[train_idx] test_idx = np.random.permutation(len(y_test)) - 1 X_test = np.array(X_test)[test_idx, :] y_test = np.array(y_test)[test_idx] return np.absolute(X_train), y_train, adj_train, \ np.absolute(X_test), y_test, adj_test, char_feat
B = np.array([ [0.6, 0.3, 0.3, 0.1, 0.1, 0.1], [0.3, 0.6, 0.3, 0.1, 0.1, 0.1], [0.3, 0.3, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.6, 0.3, 0.3], [0.1, 0.1, 0.1, 0.3, 0.6, 0.3], [0.1, 0.1, 0.1, 0.3, 0.3, 0.6], ]) n_per_comm = 100 ns = 6 * [n_per_comm] n_trials = 20 probability_matrix = np.zeros((600, 600)) for _ in range(n_trials): adjacency, labels = sbm(ns, B, return_labels=True) lt = LeidenTree(trials=5, verbose=False, max_levels=2) lt.fit(adjacency) lt.estimate_parameters(adjacency) probability_matrix += lt.full_probability_matrix.values / n_trials np.unique(probability_matrix) #%% node_data = lt.node_data node_data.sort_values(["labels_0", "labels_1"], inplace=True) node_data["sorted_adjacency_index"] = range(len(node_data)) sorted_adjacency = adjacency[np.ix_(node_data["adjacency_index"], node_data["adjacency_index"])] fig, ax = plt.subplots(1, 1, figsize=(16, 16))
return pred_labels def compute_mcr(true_labels, pred_labels): confusion = confusion_matrix(labels, pred_labels) row_inds, col_inds = linear_sum_assignment(confusion, maximize=True) mcr = 1 - (np.trace(confusion[row_inds][:, col_inds]) / np.sum(confusion)) return mcr n_replicates = 30 gammas = [24, 36, 48, 60, 64] rows = [] for replicate in range(n_replicates): # sample data adj, labels = sbm(comm_sizes, B, directed=False, return_labels=True) # GMMoASE ase_pred_labels, ase_embedding = spectral_clustering(adj, method="ase", return_embedding=True) ase_pred_labels = remap_labels(labels, ase_pred_labels) mcr = compute_mcr(labels, ase_pred_labels) for gamma in gammas: rows.append({ "mcr": mcr, "method": "GMMoASE", "gamma": gamma, "replicate": replicate })
import pandas as pd import seaborn as sns import matplotlib.pyplot as plt def eig(A): evals, evecs = np.linalg.eig(A) sort_inds = np.argsort(evals) evals = evals[sort_inds] evecs = evecs[:, sort_inds] return evals, evecs #%% B = np.array([[0.8, 0.05], [0.05, 0.8]]) A = sbm([10, 10], B) heatmap(A) #%% sns.set_context("talk") degrees = np.sum(A, axis=0) D = np.diag(degrees) L = D - A evals, evecs = eig(L) fig = plt.figure() sns.scatterplot(y=evals, x=np.arange(len(evals))) #%% rows = [] for p in np.linspace(0, 0.8, 20): for i in range(10):
edgecolor="darkred", linewidth=1, linestyle=":", facecolor="none", ) ax.add_patch(circle) #%% from graspologic.simulations import sample_edges, sbm from graspologic.utils import cartprod import seaborn as sns n_per_comm = 50 B = np.array([[0.8, 0.1, 0.1], [0.1, 0.75, 0.05], [0.1, 0.05, 0.6]]) _, labels = sbm([n_per_comm, n_per_comm, n_per_comm], B, return_labels=True) P = B[np.ix_(labels, labels)] sns.heatmap(P) #%% fig, ax = plt.subplots(1, 1, figsize=(8, 4)) true_eigvals = np.linalg.eigvalsh(P) n_sims = 1000 all_estimated_eigvals = [] for i in range(n_sims): A = sample_edges(P, directed=False, loops=True) estimated_eigvals = np.linalg.eigvalsh(A) all_estimated_eigvals += list(estimated_eigvals) sns.histplot((all_estimated_eigvals), ax=ax, stat='density')
dpi=dpi, bbox_inches=bbox_inches, transparent=transparent, pad_inches=pad_inches, facecolor=facecolor, ) set_theme() n = [15, 15] P1 = [[0.3, 0.1], [0.1, 0.7]] P2 = [[0.3, 0.1], [0.1, 0.3]] np.random.seed(8) G1 = sbm(n, P1) G2 = sbm(n, P2) embedder = OmnibusEmbed(n_components=2) Zhat = embedder.fit_transform([G1, G2]) print(Zhat.shape) Xhat1 = Zhat[0] Xhat2 = Zhat[1] Xhat_full = np.concatenate((Xhat1, Xhat2), axis=0) colors = sns.color_palette("deep") # Plot the points fig, ax = plt.subplots(figsize=(8, 8))
import numpy as np np.random.seed(42) from graspologic.simulations import sbm from graspologic.plot import heatmap # Start with some simple parameters N = 1500 # Total number of nodes n = N // 3 # Nodes per community p, q = .3, .15 B = np.array([[.3, .3, .15], [.3, .3, .15], [.15, .15, .3]]) # Our block probability matrix # Make and visualize our Stochastic Block Model A, labels = sbm([n, n, n], B, return_labels = True) heatmap(A, title="A Stochastic Block Model"); There are three communities (we promise), but the first two are impossible to distinguish between using only our network. The third community is distinct: nodes belonging to it aren't likely to connect to nodes in the first two communities, and are very likely to connect to each other. If we wanted to embed this graph using our Laplacian or Adjacency Spectral Embedding methods, we'd find the first and second communities layered on top of each other. from graspologic.embed import LaplacianSpectralEmbed as LSE from graspologic.utils import to_laplacian import matplotlib.pyplot as plt import seaborn as sns def plot_latents(latent_positions, *, title, labels, ax=None): if ax is None: ax = plt.gca() plot = sns.scatterplot(latent_positions[:, 0], latent_positions[:, 1], hue=labels, palette="Set1", linewidth=0, s=10, ax=ax)