def _omni_embed(pop_array, atlas, graph_path, ID, subgraph_name='whole_brain'): from graspy.embed import OmnibusEmbed, ClassicalMDS variance_threshold = VarianceThreshold(threshold=0.00001) diags = np.array([np.triu(pop_array[i]) for i in range(len(pop_array))]) graphs_ix_keep = variance_threshold.fit( diags.reshape(diags.shape[0], diags.shape[1] * diags.shape[2]).T).get_support(indices=True) pop_array_red = [pop_array[i] for i in graphs_ix_keep] # Omnibus embedding -- random dot product graph (rdpg) print("%s%s%s%s%s" % ('Embedding ensemble for atlas: ', atlas, ' and ', subgraph_name, '...')) omni = OmnibusEmbed(check_lcc=False) mds = ClassicalMDS() try: omni_fit = omni.fit_transform(pop_array_red) except: omni_fit = omni.fit_transform(pop_array) # Transform omnibus tensor into dissimilarity feature mds_fit = mds.fit_transform(omni_fit) dir_path = str(Path(os.path.dirname(graph_path)).parent) namer_dir = dir_path + '/embeddings' if not os.path.isdir(namer_dir): os.makedirs(namer_dir, exist_ok=True) out_path = "%s%s%s%s%s%s%s%s" % (namer_dir, '/', list( flatten(ID))[0], '_omnetome_', atlas, '_', subgraph_name, '.npy') print('Saving...') np.save(out_path, mds_fit) del mds, mds_fit, omni, omni_fit return out_path
def omni_embed(pop_array): variance_threshold = VarianceThreshold(threshold=0.05) diags = np.array([np.triu(pop_array[i]) for i in range(len(pop_array))]) diags_red = diags.reshape(diags.shape[0], diags.shape[1] * diags.shape[2]) var_thr = variance_threshold.fit(diags_red.T) graphs_ix_keep = var_thr.get_support(indices=True) pop_array_red = [pop_array[i] for i in graphs_ix_keep] # Omnibus embedding -- random dot product graph (rdpg) print("%s%s%s" % ('Embedding ensemble for atlas: ', atlas, '...')) omni = OmnibusEmbed(check_lcc=False) try: omni_fit = omni.fit_transform(pop_array_red) mds = ClassicalMDS() mds_fit = mds.fit_transform(omni_fit) except: omni_fit = omni.fit_transform(pop_array) mds = ClassicalMDS() mds_fit = mds.fit_transform(omni_fit) # Transform omnibus tensor into dissimilarity feature dir_path = os.path.dirname(graph_path) out_path = "%s%s%s%s%s%s" % (dir_path, '/', list(flatten(ID))[0], '_omnetome_', atlas, '.npy') print('Saving...') np.save(out_path, mds_fit) del mds, mds_fit, omni, omni_fit return
def __init__(self, learning_method, memory=None, verbose=False, plot_method=None, kfold=KFold(n_splits=4), flat_method=Flat): super(MDSPipeline, self).__init__(steps=learning_method, memory=memory, verbose=verbose, plot_method=plot_method, kfold=kfold) #self.LM = learning_method[0][1] self.flat_method = flat_method if not isinstance(self.steps[0][1], ClassicalMDS): self.steps = [ ('MDS', ClassicalMDS()), ('Flat', FunctionTransformer(self.flat_method, validate=False)) ] + self.steps if plot_method is not None: self.plot_method = plot_method if kfold is not None: self.kfold = kfold
# indicator = np.full(len(gm.positions_), i) # all_positions += gm.positions_ # init_indicator.append(indicator) init_indicator.append(["Barycenter"]) init_indicator.append(["Truth"]) init_indicator = np.concatenate(init_indicator) # init_indicator = np.array(init_indicator) all_positions.append(np.full(A1.shape, 1 / A1.size)) all_positions.append(P.T) all_positions = np.array(all_positions) all_positions = all_positions.reshape((len(all_positions), -1)) position_pdist = pairwise_distances(all_positions, metric="euclidean") cmds = ClassicalMDS(n_components=2, dissimilarity="euclidean") all_X = cmds.fit_transform(all_positions) all_X -= all_X[-1] # remove_rand = False # if remove_rand: # X = all_X[n_rand:] # init_indicator = init_indicator[n_rand:] # else: X = all_X plot_df = pd.DataFrame(data=X) plot_df["init"] = init_indicator sns.set_context("talk") fig, ax = plt.subplots(1, 1, figsize=(10, 10)) # sns.scatterplot(data=plot_df[plot_df["init"] == "Random"], x=0, y=1, ax=ax)
# metaheatmap(path_mat, meta, sortby_classes=["class_rank"], sortby_nodes=["mean_rank"]) # %% [markdown] # # from sklearn.manifold import MDS path_mat = path_mat.tocsr() # for fast mult print("Finding pairwise jaccard distances") pdist_sparse = pairwise_sparse_jaccard_distance(path_mat) print(pdist_sparse.shape) print("Embedding with MDS") mds = ClassicalMDS(dissimilarity="precomputed") # mds = MDS(dissimilarity="precomputed", n_components=6, n_init=16, n_jobs=-2) jaccard_embedding = mds.fit_transform(pdist_sparse) # %% [markdown] # # print("Clustering embedding") agmm = AutoGMMCluster(min_components=10, max_components=40, affinity="euclidean", linkage="single") labels = agmm.fit_predict(jaccard_embedding) pairplot(jaccard_embedding, title="AGMM o CMDS o Jaccard o Sensorimotor Paths",
def quick_embed_viewer(embed, labels=None, lp_inds=None, rp_inds=None, left_right_indexing=False): if left_right_indexing: lp_inds = np.arange(len(embed) // 2) rp_inds = np.arange(len(embed) // 2) + len(embed) // 2 fig, axs = plt.subplots(3, 2, figsize=(20, 30)) cmds = ClassicalMDS(n_components=2) cmds_euc = cmds.fit_transform(embed) plot_df = pd.DataFrame(data=cmds_euc) plot_df["labels"] = labels plot_kws = dict( x=0, y=1, hue="labels", palette=CLASS_COLOR_DICT, legend=False, s=20, linewidth=0.5, alpha=0.7, ) ax = axs[0, 0] sns.scatterplot(data=plot_df, ax=ax, **plot_kws) ax.axis("off") add_connections( plot_df.iloc[lp_inds, 0], plot_df.iloc[rp_inds, 0], plot_df.iloc[lp_inds, 1], plot_df.iloc[rp_inds, 1], ax=ax, ) ax.set_title("CMDS o euclidean") cmds = ClassicalMDS(n_components=2, dissimilarity="precomputed") pdist = symmetrize(pairwise_distances(embed, metric="cosine")) cmds_cos = cmds.fit_transform(pdist) plot_df[0] = cmds_cos[:, 0] plot_df[1] = cmds_cos[:, 1] ax = axs[0, 1] sns.scatterplot(data=plot_df, ax=ax, **plot_kws) ax.axis("off") add_connections( plot_df.iloc[lp_inds, 0], plot_df.iloc[rp_inds, 0], plot_df.iloc[lp_inds, 1], plot_df.iloc[rp_inds, 1], ax=ax, ) ax.set_title("CMDS o cosine") tsne = TSNE(metric="euclidean") tsne_euc = tsne.fit_transform(embed) plot_df[0] = tsne_euc[:, 0] plot_df[1] = tsne_euc[:, 1] ax = axs[1, 0] sns.scatterplot(data=plot_df, ax=ax, **plot_kws) ax.axis("off") add_connections( plot_df.iloc[lp_inds, 0], plot_df.iloc[rp_inds, 0], plot_df.iloc[lp_inds, 1], plot_df.iloc[rp_inds, 1], ax=ax, ) ax.set_title("TSNE o euclidean") tsne = TSNE(metric="precomputed") tsne_cos = tsne.fit_transform(pdist) plot_df[0] = tsne_cos[:, 0] plot_df[1] = tsne_cos[:, 1] ax = axs[1, 1] sns.scatterplot(data=plot_df, ax=ax, **plot_kws) ax.axis("off") add_connections( plot_df.iloc[lp_inds, 0], plot_df.iloc[rp_inds, 0], plot_df.iloc[lp_inds, 1], plot_df.iloc[rp_inds, 1], ax=ax, ) ax.set_title("TSNE o cosine") umap = UMAP(metric="euclidean", n_neighbors=30, min_dist=1) umap_euc = umap.fit_transform(embed) plot_df[0] = umap_euc[:, 0] plot_df[1] = umap_euc[:, 1] ax = axs[2, 0] sns.scatterplot(data=plot_df, ax=ax, **plot_kws) ax.axis("off") add_connections( plot_df.iloc[lp_inds, 0], plot_df.iloc[rp_inds, 0], plot_df.iloc[lp_inds, 1], plot_df.iloc[rp_inds, 1], ax=ax, ) ax.set_title("UMAP o euclidean") umap = UMAP(metric="cosine", n_neighbors=30, min_dist=1) umap_cos = umap.fit_transform(embed) plot_df[0] = umap_cos[:, 0] plot_df[1] = umap_cos[:, 1] ax = axs[2, 1] sns.scatterplot(data=plot_df, ax=ax, **plot_kws) ax.axis("off") add_connections( plot_df.iloc[lp_inds, 0], plot_df.iloc[rp_inds, 0], plot_df.iloc[lp_inds, 1], plot_df.iloc[rp_inds, 1], ax=ax, ) ax.set_title("UMAP o cosine")
def _omni_embed(pop_array, atlas, graph_path, ID, subgraph_name="whole_brain"): """ Omnibus embedding of arbitrary number of input graphs with matched vertex sets. Given :math:`A_1, A_2, ..., A_m` a collection of (possibly weighted) adjacency matrices of a collection :math:`m` undirected graphs with matched vertices. Then the :math:`(mn \times mn)` omnibus matrix, :math:`M`, has the subgraph where :math:`M_{ij} = \frac{1}{2}(A_i + A_j)`. The omnibus matrix is then embedded using adjacency spectral embedding. Parameters ---------- graphs : list of nx.Graph or ndarray, or ndarray If list of nx.Graph, each Graph must contain same number of nodes. If list of ndarray, each array must have shape (n_vertices, n_vertices). If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices). atlas : str graph_path : str ID : str subgraph_name : str Returns ------- out_path : str File path to .npy file containing omni embedding tensor. References ---------- .. [1] Levin, K., Athreya, A., Tang, M., Lyzinski, V., & Priebe, C. E. (2017, November). A central limit theorem for an omnibus embedding of multiple random dot product graphs. In Data Mining Workshops (ICDMW), 2017 IEEE International Conference on (pp. 964-967). IEEE. .. [2] Chung, J., Pedigo, B. D., Bridgeford, E. W., Varjavand, B. K., Helm, H. S., & Vogelstein, J. T. (2019). Graspy: Graph statistics in python. Journal of Machine Learning Research. """ import numpy as np from pynets.core.utils import flatten from graspy.embed import OmnibusEmbed, ClassicalMDS from joblib import dump # Omnibus embedding print( f"{'Embedding unimodal omnetome for atlas: '}{atlas}{' and '}{subgraph_name}{'...'}" ) omni = OmnibusEmbed(check_lcc=False) mds = ClassicalMDS() omni_fit = omni.fit_transform(pop_array) # Transform omnibus tensor into dissimilarity feature mds_fit = mds.fit_transform(omni_fit) dir_path = str(Path(os.path.dirname(graph_path)).parent) namer_dir = f"{dir_path}/embeddings" if not os.path.isdir(namer_dir): os.makedirs(namer_dir, exist_ok=True) out_path = ( f"{namer_dir}/{list(flatten(ID))[0]}_{atlas}_{subgraph_name}_omnetome.npy" ) out_path_est_omni = f"{namer_dir}/{list(flatten(ID))[0]}_{atlas}_{subgraph_name}_masetome_estimator_omni.joblib" out_path_est_mds = f"{namer_dir}/{list(flatten(ID))[0]}_{atlas}_{subgraph_name}_masetome_estimator_mds.joblib" dump(omni, out_path_est_omni) dump(omni, out_path_est_mds) print("Saving...") np.save(out_path, mds_fit) del mds, mds_fit, omni, omni_fit return out_path
figsize=(20, 20), row_linkage=Z, col_linkage=Z, xticklabels=False, yticklabels=False, ) stashfig("agglomerative-path-dist-mat" + basename) # %% [markdown] # ## from graspy.embed import select_dimension print("Running CMDS on path dissimilarity...") X = path_dist_mat cmds = ClassicalMDS(dissimilarity="precomputed", n_components=int(np.ceil(np.log2(np.min(X.shape))))) path_embed = cmds.fit_transform(X) elbows, elbow_vals = select_dimension(cmds.singular_values_, n_elbows=3) rng = np.arange(1, len(cmds.singular_values_) + 1) elbows = np.array(elbows) fig, ax = plt.subplots(1, 1, figsize=(8, 4)) pc = ax.scatter(elbows, elbow_vals, color="red", label="ZG") pc.set_zorder(10) ax.plot(rng, cmds.singular_values_, "o-") ax.legend() stashfig("cmds-screeplot" + basename) # %% [markdown] # ## pairplot(path_embed, alpha=0.02)
def _omni_embed(pop_array, atlas, graph_path_list, ID, subgraph_name="all_nodes", n_components=None, norm=1): """ Omnibus embedding of arbitrary number of input graphs with matched vertex sets. Given :math:`A_1, A_2, ..., A_m` a collection of (possibly weighted) adjacency matrices of a collection :math:`m` undirected graphs with matched vertices. Then the :math:`(mn \times mn)` omnibus matrix, :math:`M`, has the subgraph where :math:`M_{ij} = \frac{1}{2}(A_i + A_j)`. The omnibus matrix is then embedded using adjacency spectral embedding. Parameters ---------- pop_array : list of nx.Graph or ndarray, or ndarray If list of nx.Graph, each Graph must contain same number of nodes. If list of ndarray, each array must have shape (n_vertices, n_vertices). If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices). atlas : str The name of an atlas (indicating the node definition). graph_pathlist : list List of file paths to graphs in pop_array. ID : str An arbitrary subject identifier. subgraph_name : str Returns ------- out_path : str File path to .npy file containing omni embedding tensor. References ---------- .. [1] Levin, K., Athreya, A., Tang, M., Lyzinski, V., & Priebe, C. E. (2017, November). A central limit theorem for an omnibus embedding of multiple random dot product graphs. In Data Mining Workshops (ICDMW), 2017 IEEE International Conference on (pp. 964-967). IEEE. .. [2] Chung, J., Pedigo, B. D., Bridgeford, E. W., Varjavand, B. K., Helm, H. S., & Vogelstein, J. T. (2019). Graspy: Graph statistics in python. Journal of Machine Learning Research. """ import networkx as nx import numpy as np from pynets.core.utils import flatten from graspy.embed import OmnibusEmbed, ClassicalMDS from joblib import dump from pynets.stats.netstats import CleanGraphs clean_mats = [] i = 0 for graph_path in graph_path_list: cg = CleanGraphs(None, None, graph_path, 0, norm) if float(norm) >= 1: G = cg.normalize_graph() mat_clean = nx.to_numpy_array(G) else: mat_clean = pop_array[i] clean_mats.append(mat_clean) i += 1 # Omnibus embedding print(f"{'Embedding unimodal omnetome for atlas: '}{atlas} and " f"{subgraph_name}{'...'}") omni = OmnibusEmbed(n_components=n_components, check_lcc=False) mds = ClassicalMDS(n_components=n_components) omni_fit = omni.fit_transform(pop_array) # Transform omnibus tensor into dissimilarity feature mds_fit = mds.fit_transform( omni_fit.reshape(omni_fit.shape[1], omni_fit.shape[2], omni_fit.shape[0])) dir_path = str(Path(os.path.dirname(graph_path_list[0])).parent) namer_dir = f"{dir_path}/embeddings" if not os.path.isdir(namer_dir): os.makedirs(namer_dir, exist_ok=True) out_path = ( f"{namer_dir}/gradient-OMNI_{atlas}_{subgraph_name}_" f"{os.path.basename(graph_path_list[0]).split('_thrtype')[0]}.npy") # out_path_est_omni = f"{namer_dir}/gradient-OMNI_{atlas}_" \ # f"{subgraph_name}_" \ # f"{os.path.basename(graph_path).split('_thrtype')[0]}" \ # f"_MDS.joblib" # out_path_est_mds = f"{namer_dir}/gradient-OMNI_{atlas}_" \ # f"{subgraph_name}_" \ # f"{os.path.basename(graph_path).split('_thrtype')[0]}" \ # f"_MDS.joblib" # dump(omni, out_path_est_omni) # dump(omni, out_path_est_mds) print("Saving...") np.save(out_path, mds_fit) del mds, mds_fit, omni, omni_fit return out_path
# %% [markdown] # ## Decide on an embedding method for distance matrix dim_reduce = "cmds" basename += f"-dim_red={dim_reduce}" # %% [markdown] # ## print("Running dimensionality reduction on path dissimilarity...") X = path_dist_mat max_dim = int(np.ceil(np.log2(np.min(X.shape)))) if dim_reduce == "cmds": cmds = ClassicalMDS(dissimilarity="precomputed", n_components=max_dim) path_embed = cmds.fit_transform(X) sing_vals = cmds.singular_values_ elif dim_reduce == "iso": iso = Isomap(n_components=max_dim, metric="precomputed") path_embed = iso.fit_transform(X) sing_vals = iso.kernel_pca_.lambdas_ elif dim_reduce == "tsne": best_embed = None best_kl = np.inf n_tsne = 10 for i in range(n_tsne): manifold = TSNE(metric="precomputed") path_embed = manifold.fit_transform(X) kl = manifold.kl_divergence_ print(kl)
x_sum = X.sum(axis=1).A1 y_sum = Y.sum(axis=1).A1 xx, yy = np.meshgrid(x_sum, y_sum) union = (xx + yy).T - intersect return (1 - intersect / union).A run_name = "86.1-BDP-prob-path-cluster" threshold = 1 weight = "weight" graph_type = "Gad" cutoff = 8 base = f"-c{cutoff}-t{threshold}-{graph_type}" base_path = Path(f"./maggot_models/notebooks/outs/{run_name}/csvs") meta = pd.read_csv(base_path / str("meta" + base + ".csv"), index_col=0) path_mat = pd.read_csv(base_path / str("prob-path-mat" + base + ".csv"), index_col=0) sparse_path = csr_matrix(path_mat.values) euclid_dists = pairwise_distances(sparse_path, metric="euclidean") mds = ClassicalMDS(dissimilarity="precomputed") mds_embed = mds.fit_transform(euclid_dists) embed_df = pd.DataFrame(data=mds_embed) stashcsv(embed_df, "euclid-mds-embed")
#%% import pandas as pd import numpy as np from graspy.embed import ClassicalMDS import seaborn as sns from sklearn.metrics import pairwise_distances data_loc = "maggot_models/data/external/17-08-26L6-allC-cl.csv" ts_df = pd.read_csv(data_loc, index_col=None) ts_mat = ts_df.values.T # %% [markdown] # # corr_mat = pairwise_distances(ts_mat, metric="correlation") # %% [markdown] # # sns.clustermap(corr_mat) # %% [markdown] # # from graspy.plot import pairplot mds = ClassicalMDS(dissimilarity="precomputed") embed = mds.fit_transform(corr_mat) pairplot(embed)
# %% [markdown] # ## plot_pairs( U, labels=meta["merge_class"].values, left_pair_inds=lp_inds, right_pair_inds=rp_inds, ) # %% [markdown] # ## from graspy.utils import symmetrize # manifold = TSNE(metric="cosine") # tsne_embed = tsne.fit_transform(U) manifold = ClassicalMDS(n_components=U.shape[1] - 1, dissimilarity="precomputed") # manifold = MDS(n_components=2, dissimilarity="precomputed") # manifold = Isomap(n_components=2, metric="precomputed") pdist = symmetrize(pairwise_distances(U, metric="cosine")) manifold_embed = manifold.fit_transform(pdist) plot_pairs( manifold_embed, labels=meta["merge_class"].values, left_pair_inds=lp_inds, right_pair_inds=rp_inds, ) # %% [markdown] # ##
sns.clustermap( path_dist_mat, figsize=(20, 20), row_linkage=Z, col_linkage=Z, xticklabels=False, yticklabels=False, ) stashfig("clustermap") # %% [markdown] # ## from graspy.embed import ClassicalMDS from src.visualization import screeplot cmds = ClassicalMDS(dissimilarity="precomputed", n_components=10) path_embed = cmds.fit_transform(path_dist_mat) plt.plot(cmds.singular_values_, "o") # %% [markdown] # ## from graspy.plot import pairplot n_components = 5 pairplot(path_embed[:, :n_components], alpha=0.1) # %% [markdown] # ## from graspy.cluster import AutoGMMCluster
#%% n_components = 8 ase = AdjacencySpectralEmbed(n_components=n_components) latent = ase.fit_transform(graph) pairplot(latent, labels=block_labels) latent /= np.linalg.norm(latent, axis=1)[:, np.newaxis] pairplot(latent, labels=block_labels) # def compute_cosine_similarity(latent): # for i in range(latent.shape[0]) similarity = latent @ latent.T dissimilarity = 1 - similarity print(dissimilarity[0, 0]) cmds = ClassicalMDS(n_components=n_components - 1, dissimilarity="precomputed") cmds_latent = cmds.fit_transform(dissimilarity) pairplot(cmds_latent, labels=block_labels) #%% hsbm = HSBMEstimator( n_subgraphs=8, n_subgroups=3, n_components_lvl1=8, n_components_lvl2=3 ) hsbm.fit(graph) #%% plt.style.use("seaborn-white") model = hsbm.agglomerative_model_ dists = hsbm.subgraph_dissimilarities_ dists = dists - dists.min() c = Colormap() cmap = c.get_cmap_heat_r()