def bilateral_ase(adj): ase = AdjacencySpectralEmbed(n_components=None, n_elbows=2, check_lcc=False) ipsi_adj = adj.copy() ipsi_adj[np.ix_(left_inds, right_inds)] = 0 ipsi_adj[np.ix_(right_inds, left_inds)] = 0 ipsi_embed = ase.fit_transform(ipsi_adj) procrust = Procrustes() align_ipsi_embed = [] for e in ipsi_embed: procrust.fit(e, x_seeds=lp_inds, y_seeds=rp_inds) align_e = procrust.transform(e, map_inds=left_inds) align_ipsi_embed.append(align_e) align_ipsi_embed = np.concatenate(align_ipsi_embed, axis=1) contra_adj = adj.copy() contra_adj[np.ix_(left_inds, left_inds)] = 0 contra_adj[np.ix_(right_inds, right_inds)] = 0 contra_embed = ase.fit_transform(contra_adj) procrust = Procrustes() align_contra_embed = [] for e in contra_embed: procrust.fit(e, x_seeds=lp_inds, y_seeds=rp_inds) align_e = procrust.transform(e, map_inds=left_inds) align_contra_embed.append(align_e) align_contra_embed = np.concatenate(align_contra_embed, axis=1) return align_ipsi_embed, align_contra_embed
def ase(adj, n_components, ptr=True): if ptr: adj = pass_to_ranks(adj) ase = AdjacencySpectralEmbed(n_components=n_components) latent = ase.fit_transform(adj) latent = np.concatenate(latent, axis=-1) return latent
class ASEEmbedding(Embedding): """Implements an interface for adjacency spectral embedding; inherits from the Embedding class. """ def __init__(self): self.model = AdjacencySpectralEmbed() def fit(self, X, S=None): Xh = np.hstack(self.model.fit_transform(X)) if S is not None: Xh = np.hstack((Xh, S)) clusterer = GaussianMixture(n_components=Xh.shape[1] // 2) clusterer.fit(Xh) predict_labels = clusterer.predict(Xh) self.y = predict_labels self.H = Xh def learn_embedding(self, G, S=None, **kwargs): X = nx.adjacency_matrix(G) X = X.todense() Xh = np.hstack(self.model.fit_transform(X)) if S is not None: Xh = np.hstack((Xh, S)) clusterer = GaussianMixture(n_components=Xh.shape[1] // 2) clusterer.fit(Xh) predict_labels = clusterer.predict(Xh) self.y = predict_labels self.H = Xh def get_reconstructed_adj(self, *a, **b): return self.model.latent_left_.dot(np.diag( self.model.singular_values_)).dot(self.model.latent_right_.T)
def evaluate_models(graph, labels=None, title=None, plot_graphs=False, min_comp=0, max_comp=1, n_comp=5): if plot_graphs: heatmap(graph, inner_hier_labels=cell_labels) ## Set up models to test non_rdpg_models = [ EREstimator(fit_degrees=False), SBEstimator(fit_degrees=False), SBEstimator(fit_degrees=True), ] d = [6] rdpg_models = [RDPGEstimator(n_components=i) for i in d] models = non_rdpg_models + rdpg_models names_nonRDPG = ["ER", "SBM", "DCSBM"] names_RDPG = ["RDPGrank{}".format(i) for i in d] names = names_nonRDPG + names_RDPG bics = [] log_likelihoods = [] ## Test models for model, name in zip(models, names): m = model.fit(graph, y=labels) if plot_graphs: heatmap(m.p_mat_, inner_hier_labels=labels, title=(name + "P matrix")) heatmap(m.sample(), inner_hier_labels=labels, title=(name + "sample")) bic = m.bic(graph) log_likelihoods.append(m.score(graph)) bics.append(bic) plt.show() ase = AdjacencySpectralEmbed(n_components=2) latent = ase.fit_transform(m.p_mat_) # if type(latent) is tuple: # pairplot(np.concatenate((latent[0], latent[1]), axis=1)) # plt.show() # else: print("here") # plt.figure(figsize=(20, 20)) ax = scatterplot(latent, labels=cell_labels, height=4, alpha=0.6, font_scale=1.25) # plt.suptitle(name, y=0.94, x=0.1, fontsize=30, horizontalalignment="left") plt.savefig(name + "latent.png", format="png", dpi=1000) plt.close()
def lse(adj, n_components, regularizer=None): if PTR: adj = pass_to_ranks(adj) lap = to_laplace(adj, form="R-DAD") ase = AdjacencySpectralEmbed(n_components=n_components) latent = ase.fit_transform(lap) latent = np.concatenate(latent, axis=-1) return latent
def level(adj, meta, pred, reembed=False, X=None, R=None, plot_all=True): uni_labels, inv = np.unique(pred, return_inverse=True) all_sub_results = [] sub_data = [] for label in uni_labels: print(label) print() label_mask = pred == label sub_meta = meta[label_mask].copy() sub_meta["inds"] = range(len(sub_meta)) sub_left_inds = sub_meta[sub_meta["left"]]["inds"].values sub_right_inds = sub_meta[sub_meta["right"]]["inds"].values sub_lp_inds, sub_rp_inds = get_paired_inds(sub_meta) sub_adj = adj[np.ix_(label_mask, label_mask)] if reembed: ase = AdjacencySpectralEmbed() # TODO look into PTR at this level as well sub_embed = ase.fit_transform(sub_adj) sub_X = np.concatenate(sub_embed, axis=1) sub_R, _ = orthogonal_procrustes(sub_X[sub_lp_inds], sub_X[sub_rp_inds]) sub_X[sub_left_inds] = sub_X[sub_left_inds] @ sub_R else: sub_X = X[label_mask].copy() sub_R = R var_dict = { "meta": sub_meta, "left_inds": sub_left_inds, "right_inds": sub_right_inds, "left_pair_inds": sub_lp_inds, "right_pair_inds": sub_rp_inds, "X": sub_X, "adj": sub_adj, "reembed": reembed, } sub_data.append(var_dict) sub_results = crossval_cluster( sub_X, sub_left_inds, sub_right_inds, left_pair_inds=sub_lp_inds, right_pair_inds=sub_rp_inds, max_clusters=8, min_clusters=1, n_init=50, ) fig, axs = plot_metrics(sub_results, plot_all=plot_all) fig.suptitle(f"Clustering for cluster {label}, reembed={reembed}") stashfig(f"cluster-profile-label={label}-reembed={reembed}") plt.close() all_sub_results.append(sub_results) return all_sub_results, sub_data
def normalized_ase(graph, n_components=None, embed_kws={}): ase = AdjacencySpectralEmbed(n_components=n_components, **embed_kws) latent = ase.fit_transform(graph) if isinstance(latent, tuple): latent = np.concatenate(latent, axis=-1) norm_vec = np.linalg.norm(latent, axis=1) norm_vec[norm_vec == 0] = 1 norm_latent = latent / norm_vec[:, np.newaxis] return norm_latent
def get(n=50): ns = [n, n] p1 = np.array([[.9,.1],[.1,.9]]) p2 = np.array([[.9,.1],[.1,.9]]) A1 = sbm(ns,p1) A2 = sbm(ns,p2) X1 = AdjacencySpectralEmbed().fit_transform(A1) X2 = AdjacencySpectralEmbed().fit_transform(A2) return X1, X2
def ase_concatenate(adjs, n_components, ptr=True): if ptr: adjs = [pass_to_ranks(a) for a in adjs] ase = AdjacencySpectralEmbed(n_components=n_components // len(adjs)) graph_latents = [] for a in adjs: latent = ase.fit_transform(a) latent = np.concatenate(latent, axis=-1) graph_latents.append(latent) latent = np.concatenate(graph_latents, axis=-1) return latent
def lse(adj, n_components, regularizer=None, ptr=True): if ptr: adj = pass_to_ranks(adj) lap = to_laplace(adj, form="R-DAD", regularizer=regularizer) ase = AdjacencySpectralEmbed(n_components=n_components, diag_aug=False) latent = ase.fit_transform(lap) # latent = LaplacianSpectralEmbed( # form="R-DAD", n_components=n_components, regularizer=regularizer # ) latent = np.concatenate(latent, axis=-1) return latent
def ase_procrust_svd(embed_adjs): ase = AdjacencySpectralEmbed(n_components=None) all_embeds = [] for a in embed_adjs: embed = ase.fit_transform(a) embed = np.concatenate(embed, axis=1) embed[left_inds] = (embed[left_inds] @ orthogonal_procrustes( embed[lp_inds], embed[rp_inds])[0]) print(embed.shape) all_embeds.append(embed) cat_embed = np.concatenate(all_embeds, axis=1) print(cat_embed.shape) U, S, Vt = selectSVD(cat_embed, n_elbows=3) return U
def embed_ase(*, adj, n_components=None): """ JHU AdjacencySpectralEmbed, w/ default settings """ X_ase = AdjacencySpectralEmbed(n_components=n_components).fit_transform(adj.toarray()) if isinstance(X_ase, tuple): X_ase = np.column_stack(X_ase) return X_ase
def to_minigraph( adj, labels, drop_neg=True, remove_diag=True, size_scaler=1, use_counts=False, use_weights=True, color_map=None, ): # convert the adjacency and a partition to a minigraph based on SBM probs prob_df = get_blockmodel_df( adj, labels, return_counts=use_counts, use_weights=use_weights ) if drop_neg and ("-1" in prob_df.index): prob_df.drop("-1", axis=0, inplace=True) prob_df.drop("-1", axis=1, inplace=True) if remove_diag: adj = prob_df.values adj -= np.diag(np.diag(adj)) prob_df.data = prob_df g = nx.from_pandas_adjacency(prob_df, create_using=nx.DiGraph()) uni_labels, counts = np.unique(labels, return_counts=True) # add size attribute base on number of vertices size_map = dict(zip(uni_labels, size_scaler * counts)) nx.set_node_attributes(g, size_map, name="Size") # add signal flow attribute (for the minigraph itself) mini_adj = nx.to_numpy_array(g, nodelist=uni_labels) node_signal_flow = signal_flow(mini_adj) sf_map = dict(zip(uni_labels, node_signal_flow)) nx.set_node_attributes(g, sf_map, name="Signal Flow") # add spectral properties sym_adj = symmetrize(mini_adj) n_components = 10 latent = AdjacencySpectralEmbed(n_components=n_components).fit_transform(sym_adj) for i in range(n_components): latent_dim = latent[:, i] lap_map = dict(zip(uni_labels, latent_dim)) nx.set_node_attributes(g, lap_map, name=f"AdjEvec-{i}") # add spring layout properties pos = nx.spring_layout(g) spring_x = {} spring_y = {} for key, val in pos.items(): spring_x[key] = val[0] spring_y[key] = val[1] nx.set_node_attributes(g, spring_x, name="Spring-x") nx.set_node_attributes(g, spring_y, name="Spring-y") # add colors if color_map is None: color_map = dict(zip(uni_labels, cc.glasbey_light)) nx.set_node_attributes(g, color_map, name="Color") return g
def _embed(self, A1, A2): if self.n_components is None: num_dims1 = select_dimension(A1)[0][-1] num_dims2 = select_dimension(A2)[0][-1] self.n_components = max(num_dims1, num_dims2) ase = AdjacencySpectralEmbed(n_components=self.n_components) X1_hat = ase.fit_transform(A1) X2_hat = ase.fit_transform(A2) if isinstance(X1_hat, tuple) and isinstance(X2_hat, tuple): X1_hat = np.concatenate(X1_hat, axis=-1) X2_hat = np.concatenate(X2_hat, axis=-1) elif isinstance(X1_hat, tuple) ^ isinstance(X2_hat, tuple): msg = ("input graphs do not have same directedness. " "consider symmetrizing the directed graph.") raise ValueError(msg) return X1_hat, X2_hat
def estimate_assignments(graph, n_communities, n_components=None, method="gc", metric=None): """Given a graph and n_comunities, sweeps over covariance structures Not deterministic Not using graph bic or mse to calculate best 1. Does an embedding on the raw graph 2. GaussianCluster on the embedding. This will sweep covariance structure for the given n_communities 3. Returns n_parameters based on the number used in GaussianCluster method can be "gc" or "bc" method "gc" : use graspy GaussianCluster this defaults to full covariance "bc" : tommyclust with defaults so sweep covariance, agglom, linkage "bc-metric" : tommyclust with custom metric still sweep everything "bc-none" : mostly for testing, should behave just like GaussianCluster """ embed_graph = graph.copy() latent = AdjacencySpectralEmbed( n_components=n_components).fit_transform(embed_graph) if isinstance(latent, tuple): latent = np.concatenate(latent, axis=1) if method == "gc": gc = GaussianCluster( min_components=n_communities, max_components=n_communities, covariance_type="all", ) vertex_assignments = gc.fit_predict(latent) n_params = gc.model_._n_parameters() elif method == "bc": vertex_assignments, n_params = brute_cluster(latent, [n_communities]) elif method == "bc-metric": vertex_assignments, n_params = brute_cluster(latent, [n_communities], metric=metric) elif method == "bc-none": vertex_assignments, n_params = brute_cluster( latent, [n_communities], affinities=["none"], linkages=["none"], covariance_types=["full"], ) else: raise ValueError("Unspecified clustering method") return (vertex_assignments, n_params)
def _embed(self, adj=None): if adj is None: adj = self.adj # TODO look into PTR at this level as well # lp_inds, rp_inds = get_paired_inds(self.meta) lp_inds = self.left_pair_inds rp_inds = self.right_pair_inds embed_adj = pass_to_ranks(adj) if self.embed == "ase": embedder = AdjacencySpectralEmbed( n_components=self.n_components, n_elbows=self.n_elbows ) embed = embedder.fit_transform(embed_adj) elif self.embed == "lse": embedder = LaplacianSpectralEmbed( n_components=self.n_components, n_elbows=self.n_elbows, regularizer=self.regularizer, ) embed = embedder.fit_transform(embed_adj) elif self.embed == "unscaled_ase": embed_adj = pass_to_ranks(adj) embed_adj = augment_diagonal(embed_adj) embed = selectSVD( embed_adj, n_components=self.n_components, n_elbows=self.n_elbows ) embed = (embed[0], embed[2].T) X = np.concatenate(embed, axis=1) fraction_paired = (len(lp_inds) + len(rp_inds)) / len(self.root_inds) print(f"Learning transformation with {fraction_paired} neurons paired") R, _ = orthogonal_procrustes(X[lp_inds], X[rp_inds]) X[self.left_inds] = X[self.left_inds] @ R if self.normalize: row_sums = np.sum(X, axis=1) X /= row_sums[:, None] return X
def test_passing_embeddings(self): np.random.seed(123) A1 = er_np(20, 0.8) A2 = er_np(20, 0.8) ase_1 = AdjacencySpectralEmbed(n_components=2) X1 = ase_1.fit_transform(A1) ase_2 = AdjacencySpectralEmbed(n_components=2) X2 = ase_2.fit_transform(A2) ase_3 = AdjacencySpectralEmbed(n_components=1) X3 = ase_3.fit_transform(A2) # check embeddings having weird ndim with self.assertRaises(ValueError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1, X2.reshape(-1, 1, 1)) with self.assertRaises(ValueError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1.reshape(-1, 1, 1), X2) # check embeddings having mismatching number of components with self.assertRaises(ValueError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1, X3) with self.assertRaises(ValueError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X3, X1) # check passing weird stuff as input (caught by us) with self.assertRaises(TypeError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict("hello there", X1) with self.assertRaises(TypeError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1, "hello there") with self.assertRaises(TypeError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict({"hello": "there"}, X1) with self.assertRaises(TypeError): ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1, {"hello": "there"}) # check passing infinite in input (caught by check_array) with self.assertRaises(ValueError): X1_w_inf = X1.copy() X1_w_inf[1, 1] = np.inf ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1_w_inf, X2) # check that the appropriate input works ldt = LatentDistributionTest(input_graph=False) ldt.fit_predict(X1, X2)
def _embed(self, adj=None): if adj is None: adj = self.adj lp_inds = self.left_pair_inds rp_inds = self.right_pair_inds embed_adj = pass_to_ranks(adj) # TODO PTR here? if self.plus_c: embed_adj += 1 / adj.size if self.embed == "ase": embedder = AdjacencySpectralEmbed(n_components=self.n_components, n_elbows=self.n_elbows) embed = embedder.fit_transform(embed_adj) elif self.embed == "lse": embedder = LaplacianSpectralEmbed( n_components=self.n_components, n_elbows=self.n_elbows, regularizer=self.regularizer, ) embed = embedder.fit_transform(embed_adj) elif self.embed == "unscaled_ase": embed_adj = augment_diagonal(embed_adj) embed = selectSVD(embed_adj, n_components=self.n_components, n_elbows=self.n_elbows) embed = (embed[0], embed[2].T) X = np.concatenate(embed, axis=1) fraction_paired = (len(lp_inds) + len(rp_inds)) / len(self.root_inds) print(f"Learning transformation with {fraction_paired} neurons paired") X = self._procrustes(X) if self.normalize: row_norms = np.linalg.norm(X, axis=1) X /= row_norms[:, None] return X
def add_attributes( g, drop_neg=True, remove_diag=True, size_scaler=1, use_counts=False, use_weights=True, color_map=None, ): nodelist = list(g.nodes()) # add spectral properties sym_adj = symmetrize(nx.to_numpy_array(g, nodelist=nodelist)) n_components = 10 latent = AdjacencySpectralEmbed( n_components=n_components).fit_transform(sym_adj) for i in range(n_components): latent_dim = latent[:, i] lap_map = dict(zip(nodelist, latent_dim)) nx.set_node_attributes(g, lap_map, name=f"AdjEvec-{i}") # add spring layout properties pos = nx.spring_layout(g) spring_x = {} spring_y = {} for key, val in pos.items(): spring_x[key] = val[0] spring_y[key] = val[1] nx.set_node_attributes(g, spring_x, name="Spring-x") nx.set_node_attributes(g, spring_y, name="Spring-y") # add colors # nx.set_node_attributes(g, color_map, name="Color") for node, data in g.nodes(data=True): c = data["cell_class"] color = CLASS_COLOR_DICT[c] data["color"] = color # add size attribute base on number of edges size_map = dict(path_graph.degree(weight="weight")) nx.set_node_attributes(g, size_map, name="Size") return g
def mc_iter(n, m, p, q, tilde, i=1): X_graph = er_np(n, p*p) ase = AdjacencySpectralEmbed(n_components=1) X = ase.fit_transform(X_graph) Y_graph = er_np(m, q*q) ase = AdjacencySpectralEmbed(n_components=1) Y = ase.fit_transform(Y_graph) if tilde: X_new, Y_new = sample_noisy_points(X, Y) else: X_new, Y_new = X, Y ldt = LatentDistributionTest() pval = ldt.fit(X_new, Y_new, pass_graph=False) return pval
# close to what we set originally if we undo the rescaling step. # %% double checking on model params sbme = SBMEstimator(directed=False, loops=False) sbme.fit(adj, y=labels) block_p_hat = sbme.block_p_ block_heatmap(block_p_hat, title=r"Observed $\hat{B}$") block_p_hat_unscaled = block_p_hat * 1 / scaling_factor block_heatmap(block_p_hat_unscaled, title=r"Observed $\hat{B}$ (unscaled)") # %% [markdown] # ## Spectral embedding # Here I use graspy to do ASE, LSE, and regularized LSE. Note that we're just using the # SVDs here. There is an option on whether to throw out the first eigenvector. #%% embeddings embed_kws = dict(n_components=k + 1, algorithm="full", check_lcc=False) ase = AdjacencySpectralEmbed(**embed_kws) lse = LaplacianSpectralEmbed(form="DAD", **embed_kws) rlse = LaplacianSpectralEmbed(form="R-DAD", **embed_kws) ase_embed = ase.fit_transform(adj) lse_embed = lse.fit_transform(adj) rlse_embed = rlse.fit_transform(adj) embeddings_list = [ase_embed, lse_embed, rlse_embed] remove_first = True for i, embedding in enumerate(embeddings_list): if remove_first: embeddings_list[i] = embedding[:, 1:] else: embeddings_list[i] = embedding[:, :k]
stashfig("random-sf") adj_df = pd.DataFrame() adj_df["Signal flow"] = true_z adj_df["Input"] = "Adjacency" adj_df["Block"] = labels # fg = sns.FacetGrid(dist_df, col="Label", col_wrap=2, aspect=2, hue="Label") # fg.map(sns.distplot, "Signal flow") # stashfig("sf-dists") # %% [markdown] # # try with p_hat from graspy.embed import AdjacencySpectralEmbed latent = AdjacencySpectralEmbed(n_components=n_blocks).fit_transform(A) P_hat = latent[0] @ latent[1].T # P_hat -= P_hat.min() heatmap(P_hat, title=r"$\hat{P}$") stashfig("p-hat") true_z = signal_flow(P_hat) sort_inds = np.argsort(true_z)[::-1] heatmap( A[np.ix_(sort_inds, sort_inds)], cbar=False, title=r"Feedforward SBM, sorted by $\hat{P}$ signal flow", ) stashfig("ffSBM-phat-sf") A_fake = A.copy().ravel() np.random.shuffle(A_fake)
def simulation(n, pi, normal_params, beta_params, cond_ind=True, errors=None, smooth=False, acorn=None): #- Type checks if isinstance(normal_params, list): sbm_check = False # there are other checks to do.. elif isinstance(normal_params, np.ndarray): if normal_params.ndim is 2: if np.sum(normal_params == normal_params.T) == np.prod( normal_params.shape): sbm_check = True else: msg = 'if normal_params is a 2 dimensional array it must be symmetric' raise ValueError(msg) else: msg = 'if normal_params is an array, it must be a 2 dimensional array' raise TypeError(msg) else: msg = 'normal_params must be either a list or a 2 dimensional array' raise TypeError(msg) if acorn is None: acorn = np.random.randint(10**6) np.random.seed(acorn) #- Multinomial trial counts = np.random.multinomial(n, [pi, 1 - pi]) #- Hard code the number of blocks K = 2 #- Set labels labels = np.concatenate((np.zeros(counts[0]), np.ones(counts[1]))) #- number of seeds = n_{i}/10 n_seeds = np.round(0.1 * counts).astype(int) #- Set training and test data class_train_idx = [ range(np.sum(counts[:k]), np.sum(counts[:k]) + n_seeds[k]) for k in range(K) ] train_idx = np.concatenate((class_train_idx)).astype(int) test_idx = [k for k in range(n) if k not in train_idx] #- Total number of seeds m = np.sum(n_seeds) #- estimate class probabilities pi_hats = n_seeds / m #- Sample from beta distributions beta_samples = beta_sampler(counts, beta_params) Z = beta_samples #- Sample from multivariate normal or SBM either independently of Zs or otherwise if cond_ind: if sbm_check: A = sbm(counts, normal_params) ase_obj = ASE(n_elbows=1) X = ase_obj.fit_transform(A) else: X = MVN_sampler(counts, normal_params) if len(normal_params[0][0]) is 1: X = X[:, np.newaxis] else: if sbm_check: P = blowup( normal_params, counts ) # A big version of B to be able to change connectivity probabilities of individual nodes scales = np.prod(Z, axis=1)**( 1 / Z.shape[1] ) # would do just the outer product, but if the Z's are too small we risk not being connected new_P = P * (scales @ scale.T) # new probability matrix A = sbm(np.ones(n).astype(int), new_P) ase_obj = ASE(n_elbows=1) X = ase_obj.fit_transform(A) else: X = conditional_MVN_sampler(Z=Z, rho=1, counts=counts, params=normal_params, seed=None) if len(normal_params[0][0]) is 1: X = X[:, np.newaxis] XZ = np.concatenate((X, Z), axis=1) #- Estimate normal parameters using seeds params = [] for i in range(K): temp_mu, temp_cov = estimate_normal_parameters(X[class_train_idx[i]]) params.append([temp_mu, temp_cov]) #- Using conditional indendence assumption (RF, KNN used for posterior estimates) if errors is None: errors = [[] for i in range(5)] rf1 = RF(n_estimators=100, max_depth=int(np.round(np.log(Z[train_idx].shape[0])))) rf1.fit(Z[train_idx], labels[train_idx]) knn1 = KNN(n_neighbors=int(np.round(np.log(Z[train_idx].shape[0])))) knn1.fit(Z[train_idx], labels[train_idx]) if smooth: temp_pred = classify(X[test_idx], Z[test_idx], params, rf1, m=m) temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx) errors[0].append(temp_error) temp_pred = classify(X[test_idx], Z[test_idx], params, knn1, m=m) temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx) errors[1].append(temp_error) else: temp_pred = classify(X[test_idx], Z[test_idx], params, rf1) temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx) errors[0].append(temp_error) knn1 = KNN(n_neighbors=int(np.round(np.log(m)))) knn1.fit(Z[train_idx], labels[train_idx]) temp_pred = classify(X[test_idx], Z[test_idx], params, knn1) temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx) errors[1].append(temp_error) temp_pred = QDA(X[test_idx], pi_hats, params) temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx) errors[2].append(temp_error) #- Not using conditional independence assumption (RF, KNN used for classification) XZseeds = np.concatenate((X[train_idx], Z[train_idx]), axis=1) rf2 = RF(n_estimators=100, max_depth=int(np.round(np.log(m)))) rf2.fit(XZ[train_idx], labels[train_idx]) temp_pred = rf2.predict(XZ[test_idx]) temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx) errors[3].append(temp_error) knn2 = KNN(n_neighbors=int(np.round(np.log(m)))) knn2.fit(XZ[train_idx], labels[train_idx]) temp_pred = knn2.predict(XZ[test_idx]) temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx) errors[4].append(temp_error) temp_accuracy = GCN(adj, features, train_idx, labels) temp_error = 1 - temp_accuracy errors[5].append(temp_error) return errors
def _ase_embed(mat, atlas, graph_path, ID, subgraph_name="whole_brain"): """ Class for computing the adjacency spectral embedding of a graph. The adjacency spectral embedding (ASE) is a k-dimensional Euclidean representation of the graph based on its adjacency matrix. It relies on an SVD to reduce the dimensionality to the specified k, or if k is unspecified, can find a number of dimensions automatically Parameters ---------- graphs : list of nx.Graph or ndarray, or ndarray If list of nx.Graph, each Graph must contain same number of nodes. If list of ndarray, each array must have shape (n_vertices, n_vertices). If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices). atlas : str graph_path : str ID : str subgraph_name : str Returns ------- out_path : str File path to .npy file containing ASE embedding tensor. Notes ----- The singular value decomposition: .. math:: A = U \Sigma V^T is used to find an orthonormal basis for a matrix, which in our case is the adjacency matrix of the graph. These basis vectors (in the matrices U or V) are ordered according to the amount of variance they explain in the original matrix. By selecting a subset of these basis vectors (through our choice of dimensionality reduction) we can find a lower dimensional space in which to represent the graph. References ---------- .. [1] Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E. "A Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs," Journal of the American Statistical Association, Vol. 107(499), 2012 """ import numpy as np from pynets.core.utils import flatten from graspy.embed import AdjacencySpectralEmbed from joblib import dump from graspy.utils import get_lcc # Adjacency Spectral embedding print( f"{'Embedding unimod asetome for atlas: '}{atlas}{' and '}{subgraph_name}{'...'}" ) ase = AdjacencySpectralEmbed() ase_fit = ase.fit_transform(get_lcc(mat)) dir_path = str(Path(os.path.dirname(graph_path)).parent) namer_dir = f"{dir_path}/embeddings" if not os.path.isdir(namer_dir): os.makedirs(namer_dir, exist_ok=True) out_path = f"{namer_dir}/{list(flatten(ID))[0]}_{atlas}_{subgraph_name}_asetome.npy" out_path_est = f"{namer_dir}/{list(flatten(ID))[0]}_{atlas}_{subgraph_name}_asetome_estimator.joblib" dump(ase, out_path_est) print("Saving...") np.save(out_path, ase_fit) del ase, ase_fit return out_path
from graspy.simulations import er_np # Experiment parameters n_verts = 200 p = 0.5 n_components = 1 n_sims = 1000 # Run experiment estimated_latents = np.zeros((n_sims, 2)) for i in range(n_sims): graph = er_np(n_verts, p, directed=False, loops=False) ase_diag = AdjacencySpectralEmbed(n_components=n_components, diag_aug=True) ase = AdjacencySpectralEmbed(n_components=n_components, diag_aug=False) diag_latent = ase_diag.fit_transform(graph) ase_latent = ase.fit_transform(graph) mean_diag_latent = np.mean(diag_latent) mean_latent = np.mean(ase_latent) estimated_latents[i, 0] = mean_diag_latent estimated_latents[i, 1] = mean_latent diffs = estimated_latents - np.sqrt(p) # the true latent position is sqrt(p) # Plot results
#%% %matplotlib inline from graspy.plot import * from graspy.simulations import sbm from graspy.embed import AdjacencySpectralEmbed import numpy as np import matplotlib.pyplot as plt import seaborn as sns g = sbm([100, 100], [[0.8, 0.2], [0.2, 0.8]]) ase = AdjacencySpectralEmbed() X = ase.fit_transform(g) labels = 25 * [0] + 25 * [1] + 25 * [2] + 24 * [-1] + [-2] # pairplot(X, size=50, alpha=0.6) plt.show()
meta = mg.meta adj = mg.adj adj = pass_to_ranks(adj) meta["inds"] = range(len(meta)) left_inds = meta[meta["left"]]["inds"] right_inds = meta[meta["right"]]["inds"] lp_inds, rp_inds = get_paired_inds(meta) # %% [markdown] # ## Embed # Here the embedding is ASE, with PTR and DiagAug, the number of embedding dimensions # is for now set to ZG2 (4 + 4). Using the known pairs as "seeds", the left embedding # is matched to the right using procrustes. ase = AdjacencySpectralEmbed(n_components=None, n_elbows=2) embed = ase.fit_transform(adj) n_components = embed[0].shape[1] # use all of ZG2 X = np.concatenate((embed[0][:, :n_components], embed[1][:, :n_components]), axis=-1) R, _ = orthogonal_procrustes(X[lp_inds], X[rp_inds]) if CLUSTER_SPLIT == "best": X[left_inds] = X[left_inds] @ R # %% [markdown] # ## Clustering # Clustering is performed using Gaussian mixture modeling. At each candidate value of k, # 50 models are trained on the left embedding, 50 models are trained on the right # embedding (choosing the best covariance structure based on BIC on the train set). results = crossval_cluster(
# ) #%% from graspy.embed import AdjacencySpectralEmbed, OmnibusEmbed from graspy.utils import pass_to_ranks from graspy.plot import pairplot sum_adj = np.sum(np.array(mb_color_graphs), axis=0) n_components = 4 # ptr_adj = pass_to_ranks(sum_adj) ase = AdjacencySpectralEmbed(n_components=n_components) sum_latent = ase.fit_transform(ptr_adj) sum_latent = np.concatenate(sum_latent, axis=-1) pairplot(sum_latent, labels=mb_class_labels) ptr_color_adjs = [pass_to_ranks(a) for a in mb_color_graphs] # graph_sum = [np.sum(a) for a in mb_color_graphs] # ptr_color_adjs = [ptr_color_adjs[i] + (1 / graph_sum[i]) for i in range(4)] omni = OmnibusEmbed(n_components=n_components // 4) color_latent = omni.fit_transform(ptr_color_adjs) color_latent = np.concatenate(color_latent, axis=-1) color_latent = np.concatenate(color_latent, axis=-1) pairplot(color_latent, labels=mb_class_labels) from graspy.embed import MultipleASE
ax.set_yticklabels([name0, name1, name2, name3]) # %% [markdown] # ## matrixplot( path_indicator_mat[:50, :50], plot_type="scattermap", sizes=(0.2, 0.2), hue="weight", palette=sns.color_palette("husl", n_colors=10), ax=ax, ) # %% [markdown] # ## embedder = AdjacencySpectralEmbed(n_components=None, n_elbows=2) embed = embedder.fit_transform(adj) embed = np.concatenate(embed, axis=-1) pairplot(embed, labels=labels, palette="tab20") # %% [markdown] # ## Run paths print(f"Running {n_init} random walks from each source node...") transition_probs = to_markov_matrix(adj) out_inds = np.where(labels == n_blocks - 1)[0] source_inds = np.where(labels == 0)[0] def rw_from_node(s):
left_inds = meta[meta["left"]]["inds"] right_inds = meta[meta["right"]]["inds"] def remove_axis(ax): remove_spines(ax) ax.set_xlabel("") ax.set_ylabel("") ax.set_xticks([]) ax.set_yticks([]) method = "ortho" print("Embedding graph...") embedder = AdjacencySpectralEmbed(n_components=None, n_elbows=2) in_embed, out_embed = embedder.fit_transform(pass_to_ranks(adj)) procrust = Procrustes(method=method) # procrust.fit(in_embed, x_seeds=lp_inds, y_seeds=rp_inds) embed = np.concatenate((in_embed, out_embed), axis=-1) dim1 = 0 dim2 = 4 fig, axs = plt.subplots(2, 2, figsize=(20, 20)) plot_df = pd.DataFrame(data=embed[:, [0, 1]]) plot_df["merge_class"] = meta["merge_class"].values ax = axs[0, 0] sns.scatterplot( data=plot_df, x=0,