col_meta=col_df, col_sort_class=["from_class"], row_colors=CLASS_COLOR_DICT, row_meta=row_df, row_sort_class=["to_class"], plot_type="heatmap", sizes=(0.5, 0.5), tick_rot=45, ) stashfig("log-full-heat" + basename) # %% [markdown] # # Screeplots if plot_embed: screeplot(hist_mat.astype(float), title="Raw hist mat (full)") stashfig("scree-raw-mat" + basename) screeplot(log_mat, title="Log hist mat (full)") stashfig("scree-log-mat" + basename) # %% [markdown] # # Pairplots if plot_embed: pca = PCA(n_components=6) embed = pca.fit_transform(log_mat) loadings = pca.components_.T pg = pairplot( embed, labels=to_class.values, palette=CLASS_COLOR_DICT, height=5,
# sym_adj, class_labels, side_labels = preprocess_graph( # sym_adj, class_labels, side_labels # ) # n_verts = sym_adj.shape[0] # print(f"Removed {old_n_verts - n_verts} nodes") # %% [markdown] # # Embedding n_verts = mg.n_verts sym_adj = mg.adj side_labels = mg["Hemisphere"] class_labels = mg["Merge Class"] latent, laplacian = lse(sym_adj, N_COMPONENTS, regularizer=None, ptr=PTR) latent_dim = latent.shape[1] // 2 screeplot( laplacian, title=f"Laplacian scree plot, R-DAD (ZG2 = {latent_dim} + {latent_dim})") print(f"ZG chose dimension {latent_dim} + {latent_dim}") plot_latent = np.concatenate( (latent[:, :3], latent[:, latent_dim:latent_dim + 3]), axis=-1) pairplot(plot_latent, labels=side_labels) # take the mean for the paired cells, making sure to add back in the unpaired cells sym_latent = (latent[:n_pairs] + latent[n_pairs:2 * n_pairs]) / 2 sym_latent = np.concatenate((sym_latent, latent[2 * n_pairs:])) latent = sym_latent # make new labels side_labels = np.concatenate((n_pairs * ["P"], side_labels[2 * n_pairs:])) # this is assuming that the class labels are perfectly matches left right, probs not
col_meta=meta, col_colors="merge_class", col_sort_class=["merge_class"], col_palette=CLASS_COLOR_DICT, col_ticks=False, col_class_order="group_mean_visit", col_item_order="mean_visit", ) stashfig("log_hop_hist") # %% [markdown] # ## Scree plots from src.visualization import screeplot screeplot(all_hop_hist, show_first=40) stashfig("scree-first-40") screeplot(all_hop_hist, show_first=None) stashfig("scree-all") screeplot(np.log10(all_hop_hist + 1), show_first=100) screeplot(np.log10(all_hop_hist + 1), show_first=100, cumulative=True) # %% [markdown] # ## from graspy.cluster import AutoGMMCluster agmm = AutoGMMCluster( min_components=2, max_components=50, affinity=["euclidean", "manhattan"],
embed[left_inds] = embed[left_inds] @ R # %% [markdown] # ## joint = np.concatenate((embed, full_hop_hist.T), axis=1) from graspy.plot import pairplot from sklearn.decomposition import PCA from sklearn.decomposition import TruncatedSVD from sklearn.preprocessing import StandardScaler from src.visualization import screeplot joint = StandardScaler(with_mean=False, with_std=True).fit_transform(joint) screeplot(joint) embedder = TruncatedSVD(n_components=4) joint_embed = embedder.fit_transform(joint) pg = pairplot(joint_embed, labels=meta["merge_class"].values, palette=CLASS_COLOR_DICT) pg._legend.remove() # %% meta["inds"] = range(len(meta)) left_inds = meta[meta["left"]]["inds"] right_inds = meta[meta["right"]]["inds"] lp_inds, rp_inds = get_paired_inds(meta) results = crossval_cluster(
) stashfig("edgesum-scatter") edgesum_mat = calc_edgesums(color_adjs) in_cols = ["In " + n for n in GRAPH_TYPES] out_cols = ["Out " + n for n in GRAPH_TYPES] cols = np.array(in_cols + out_cols) edgesum_df = pd.DataFrame(data=edgesum_mat, columns=cols) figsize = (20, 20) sns.clustermap(edgesum_df, figsize=figsize) plt.title("Edgesum matrix, single linkage euclidean dendrograms", loc="center") screeplot(edgesum_mat, cumulative=False, title="Edgesum matrix screeplot") plt.ylim((0, 0.5)) pca = PCA(n_components=3) edgesum_pcs = pca.fit_transform(edgesum_mat) var_exp = np.sum(pca.explained_variance_ratio_) pairplot( edgesum_pcs, height=5, alpha=0.3, title=f"Edgesum PCs, {var_exp} variance explained" ) pairplot( edgesum_pcs, labels=class_labels, height=5, alpha=0.3,