def compute_mse_from_assignments(assignments, graph, directed=True, loops=False): estimator = SBMEstimator(loops=loops, directed=directed) estimator.fit(graph, y=assignments) return compute_mse(estimator, graph)
def dcsbm_objective(adj, labels): # class1_var = np.var(input[class1_inds]) # class2_var = np.var(input[class2_inds]) dcsbm = SBMEstimator() dcsbm.fit(adj, y=labels) objective = dcsbm.score(adj) return objective
def run_fit(seed): np.random.seed(seed) # load left_graph, left_labels = load_left() right_graph, right_labels = load_right() # fit SBM left, predict right sbm_fit_left = SBMEstimator(directed=True, loops=False) sbm_fit_left.fit(left_graph, y=left_labels) right_pred_mse = mse_on_other(sbm_fit_left, right_graph, right_labels) right_pred_likelihood = likelihood_on_other(sbm_fit_left, right_graph, right_labels) right_pred_sc_likelihood = likelihood_on_other( sbm_fit_left, right_graph, right_labels, clip=1 / (right_graph.size - right_graph.shape[0]), ) right_pred_dict = { "n_params": sbm_fit_left._n_parameters(), "mse": right_pred_mse, "likelihood": right_pred_likelihood, "zc_likelihood": right_pred_likelihood, "sc_likelihood": right_pred_sc_likelihood, } right_pred_df = pd.DataFrame(right_pred_dict, index=[0]) print(right_pred_df) save_obj(right_pred_df, file_obs, "right_pred_sbm_df") # fit SBM right, predict left sbm_fit_right = SBMEstimator(directed=True, loops=False) sbm_fit_right.fit(right_graph, y=right_labels) left_pred_mse = mse_on_other(sbm_fit_right, left_graph, left_labels) left_pred_likelihood = likelihood_on_other(sbm_fit_right, left_graph, left_labels) left_pred_sc_likelihood = likelihood_on_other( sbm_fit_right, left_graph, left_labels, clip=1 / (left_graph.size - left_graph.shape[0]), ) left_pred_dict = { "n_params": sbm_fit_right._n_parameters(), "mse": left_pred_mse, "likelihood": left_pred_likelihood, "zc_likelihood": left_pred_likelihood, "sc_likelihood": left_pred_sc_likelihood, } left_pred_df = pd.DataFrame(left_pred_dict, index=[0]) print(left_pred_df) save_obj(left_pred_df, file_obs, "left_pred_sbm_df") # sbm_fit_right = SBMEstimator(directed=True, loops=False) # sbm_fit_right.fit(right_graph, y=right_labels) # right_b = sbm_fit_right.block_p_ # # save_obj(sbm_left_df, file_obs, "sbm_left_df") return 0
def probplot( adj, labels, log_scale=False, figsize=(20, 20), cmap="Purples", title="Edge probability", vmin=0, vmax=None, ax=None, font_scale=1, ): sbm = SBMEstimator(directed=True, loops=True) sbm.fit(binarize(adj), y=labels) data = sbm.block_p_ uni_labels = np.unique(labels) cbar_kws = {"fraction": 0.08, "shrink": 0.8, "pad": 0.03} if log_scale: data = data + 0.001 vmin = data.min().min() vmax = data.max().max() log_norm = LogNorm(vmin=vmin, vmax=vmax) cbar_ticks = [ math.pow(10, i) for i in range( math.floor(math.log10(data.min().min())), 1 + math.ceil(math.log10(data.max().max())), ) ] cbar_kws["ticks"] = cbar_ticks prob_df = pd.DataFrame(columns=uni_labels, index=uni_labels, data=data) if ax is None: plt.figure(figsize=figsize) ax = plt.gca() ax.set_title(title, pad=30, fontsize=30) sns.set_context("talk", font_scale=font_scale) heatmap_kws = dict( cbar_kws=cbar_kws, annot=True, square=True, cmap=cmap, vmin=vmin, vmax=vmax ) if log_scale: heatmap_kws["norm"] = log_norm if ax is not None: heatmap_kws["ax"] = ax ax.tick_params(axis="both", which="major", labelsize=30) # ax.tick_params(axis="both", which="minor", labelsize=8) ax = sns.heatmap(prob_df, **heatmap_kws) ax.set_yticklabels(ax.get_yticklabels(), rotation=0) return ax, prob_df
def test_SBM_nparams(self): e = self.estimator.fit(self.graph, y=self.labels) assert e._n_parameters() == (4) e = SBMEstimator() e.fit(self.graph) assert e._n_parameters() == (4 + 1) e = SBMEstimator(directed=False) e.fit(self.graph) assert e._n_parameters() == (1 + 3)
def get_sbm_prob(adj, labels): sbm = SBMEstimator(directed=True, loops=True) sbm.fit(binarize(adj), y=labels) data = sbm.block_p_ uni_labels, counts = np.unique(labels, return_counts=True) sort_inds = np.argsort(counts)[::-1] uni_labels = uni_labels[sort_inds] data = data[np.ix_(sort_inds, sort_inds)] prob_df = pd.DataFrame(columns=uni_labels, index=uni_labels, data=data) return prob_df
def test_SBM_score(self): # tests score() and score_sample() B = np.array([[0.75, 0.25], [0.25, 0.75]]) n_verts = 100 n = np.array([n_verts, n_verts]) tau = _n_to_labels(n) p_mat = _block_to_full(B, tau, shape=(n_verts * 2, n_verts * 2)) graph = sample_edges(p_mat, directed=True) estimator = SBMEstimator(max_comm=4) _test_score(estimator, p_mat, graph) with pytest.raises(ValueError): estimator.score_samples(graph=graph[1:100, 1:100])
def test_SBM_fit_unsupervised(self): np.random.seed(12345) n_verts = 1500 B = np.array([[0.7, 0.1, 0.1], [0.1, 0.9, 0.1], [0.05, 0.1, 0.75]]) n = np.array([500, 500, 500]) labels = _n_to_labels(n) p_mat = _block_to_full(B, labels, (n_verts, n_verts)) p_mat -= np.diag(np.diag(p_mat)) graph = sample_edges(p_mat, directed=True, loops=False) sbe = SBMEstimator(directed=True, loops=False) sbe.fit(graph) assert adjusted_rand_score(labels, sbe.vertex_assignments_) > 0.95 assert_allclose(p_mat, sbe.p_mat_, atol=0.12)
def test_SBM_fit_supervised(self): np.random.seed(8888) B = np.array([ [0.9, 0.2, 0.05, 0.1], [0.1, 0.7, 0.1, 0.1], [0.2, 0.4, 0.8, 0.5], [0.1, 0.2, 0.1, 0.7], ]) n = np.array([500, 500, 250, 250]) g = sbm(n, B, directed=True, loops=False) sbe = SBMEstimator(directed=True, loops=False) labels = _n_to_labels(n) sbe.fit(g, y=labels) B_hat = sbe.block_p_ assert_allclose(B_hat, B, atol=0.01)
def run_fit(seed, directed): # run left graph, labels = load_left() print(labels) if not directed: graph = symmetrize(graph, method="avg") # fit SBM sbm = SBMEstimator(directed=True, loops=False) sbm_left_df = fit_a_priori(sbm, graph, labels) print(sbm_left_df["n_params"]) save_obj(sbm_left_df, file_obs, "sbm_left_df") # fit DCSBM dcsbm = DCSBMEstimator(directed=True, loops=False, degree_directed=False) dcsbm_left_df = fit_a_priori(dcsbm, graph, labels) save_obj(dcsbm_left_df, file_obs, "dcsbm_left_df") # fit dDCSBM ddcsbm = DCSBMEstimator(directed=True, loops=False, degree_directed=True) ddcsbm_left_df = fit_a_priori(ddcsbm, graph, labels) save_obj(ddcsbm_left_df, file_obs, "ddcsbm_left_df") # run right graph, labels = load_right() if not directed: graph = symmetrize(graph, method="avg") # fit SBM sbm = SBMEstimator(directed=True, loops=False) sbm_right_df = fit_a_priori(sbm, graph, labels) save_obj(sbm_right_df, file_obs, "sbm_right_df") # fit DCSBM dcsbm = DCSBMEstimator(directed=True, loops=False, degree_directed=False) dcsbm_right_df = fit_a_priori(dcsbm, graph, labels) save_obj(dcsbm_right_df, file_obs, "dcsbm_right_df") # fit dDCSBM ddcsbm = DCSBMEstimator(directed=True, loops=False, degree_directed=True) ddcsbm_right_df = fit_a_priori(ddcsbm, graph, labels) save_obj(ddcsbm_right_df, file_obs, "ddcsbm_right_df") return 0
def setup_class(cls): estimator = SBMEstimator(directed=True, loops=False) B = np.array([[0.9, 0.1], [0.1, 0.9]]) g = sbm([50, 50], B, directed=True) labels = _n_to_labels([50, 50]) p_mat = _block_to_full(B, labels, (100, 100)) p_mat -= np.diag(np.diag(p_mat)) cls.estimator = estimator cls.p_mat = p_mat cls.graph = g cls.labels = labels
def get_sbm_prob(adj, labels): uni_labels, counts = np.unique(labels, return_counts=True) label_map = dict(zip(uni_labels, range(len(uni_labels)))) y = np.array(itemgetter(*labels)(label_map)) sbm = SBMEstimator(directed=True, loops=True) sbm.fit(binarize(adj), y=y) data = sbm.block_p_ sort_inds = np.argsort(counts)[::-1] uni_labels = uni_labels[sort_inds] data = data[np.ix_(sort_inds, sort_inds)] prob_df = pd.DataFrame(columns=uni_labels, index=uni_labels, data=data) return prob_df
#%% from graspy.models import SBMEstimator from src.data import load_new_left from graspy.plot import heatmap import numpy as np adj, labels = load_new_left() sbm = SBMEstimator(loops=False, co_block=False) sbm.fit(adj, y=labels) heatmap(sbm.p_mat_, inner_hier_labels=labels, vmin=0, vmax=1) #%% co_labels = np.stack((labels, labels), axis=1).astype("U3") for i, row in enumerate(co_labels): if row[1] == "O" or row[1] == "I": co_labels[i, 1] = "O/I" co_labels #%% cosbm = SBMEstimator(loops=False, co_block=True) cosbm.fit(adj, y=co_labels) heatmap(cosbm.p_mat_, inner_hier_labels=labels) #%%
ax = axs[0, level] _, _, top, _ = adjplot( binarize(full_adj), sizes=(0.5, 0.5), ax=ax, plot_type="scattermap", sort_class=["hemisphere"] + level_names[:level + 1], item_order=["merge_class_sf_order", "merge_class", "sf"], class_order="sf", meta=meta, palette=CLASS_COLOR_DICT, colors="merge_class", ticks=False, gridline_kws=dict(linewidth=0.05, color="grey", linestyle="--"), ) sbm = SBMEstimator(directed=True, loops=True) labels, inv = np.unique(full_meta[label_name].values, return_inverse=True) sbm.fit(binarize(full_adj), inv) ax = axs[1, level] _, _, top, _ = adjplot( sbm.p_mat_, ax=ax, plot_type="heatmap", sort_class=["hemisphere"] + level_names[:level + 1], item_order=["merge_class_sf_order", "merge_class", "sf"], class_order="sf", meta=meta, palette=CLASS_COLOR_DICT, colors="merge_class", ticks=False, gridline_kws=dict(linewidth=0.05, color="grey", linestyle="--"),
def select_sbm( graph, param_grid, directed=True, co_block=False, metric="mse", c=0, rank="full", n_jobs=1, n_init=1, ): """sweeps over n_components, n_blocks, fits an sbm for each Using GaussianCluster, so will internally sweep covariance structure and pick best Returns n_params for the gaussian N_params for the sbm kinda rss score Maybe at some point this will sweep rank of B Parameters ---------- graph : [type] [description] n_block_try_range : [type] [description] n_components_try_range : [type] [description] directed : bool, optional [description], by default False """ # common parameters of all estimators sbm = SBMEstimator(directed=directed, loops=False, co_block=co_block, metric=metric, rank=rank) # define scoring functions to evaluate models scorers = gen_scorers(sbm, graph) # run the grid search grid_search = GridSearchUS( sbm, param_grid, scoring=scorers, n_jobs=n_jobs, verbose=0, refit=False, n_init=n_init, ) grid_search.fit(graph) out_df = grid_search.cv_results_ # out_df["param_regularizer"] = [ # v["regularizer"] for v in out_df["param_embed_kws"].values # ] # out_dict = {} # for i, n_components_try in enumerate(n_components_try_range): # for j, n_block_try in enumerate(n_block_try_range): # # check special case for ER, don't need to cluster # if n_block_try == 1: # vertex_assignments = np.zeros(graph.shape[0]) # n_params_gmm = 1 # else: # vertex_assignments, n_params_gmm = estimate_assignments( # graph, n_block_try, n_components_try, method=method, metric=metric # ) # if rank == "sweep": # rank_try_range = list(range(1, n_block_try + 1)) # else: # rank_try_range = [n_block_try] # for k, rank_try in enumerate(rank_try_range): # ind = i * len(n_block_try_range) + j * len(rank_try_range) + k # estimator = SBMEstimator(directed=directed, loops=False, rank=rank_try) # estimator.fit(graph, y=vertex_assignments) # rss = compute_rss(estimator, graph) # mse = compute_mse(estimator, graph) # score = np.sum(estimator.score_samples(graph, clip=c)) # n_params_sbm = estimator._n_parameters() # # account for the estimated positions # if type(estimator) == SBMEstimator: # n_params_sbm += estimator.block_p_.shape[0] - 1 # out_dict[ind] = { # "n_params_gmm": n_params_gmm, # "n_params_sbm": n_params_sbm, # "rss": rss, # "mse": mse, # "score": score, # "n_components_try": n_components_try, # "n_block_try": n_block_try, # "rank_try": rank_try, # } # out_df = pd.DataFrame.from_dict(out_dict, orient="index") return out_df
# GMM likelihood score = gmm.model_.score(latent) temp_dict = base_dict.copy() temp_dict["Metric"] = "GMM likelihood" temp_dict["Score"] = score out_dicts.append(temp_dict) # GMM BIC score = gmm.model_.bic(latent) temp_dict = base_dict.copy() temp_dict["Metric"] = "GMM BIC" temp_dict["Score"] = score out_dicts.append(temp_dict) # SBM likelihood sbm = SBMEstimator(directed=True, loops=False) sbm.fit(bin_adj, y=pred_labels) score = sbm.score(bin_adj) temp_dict = base_dict.copy() temp_dict["Metric"] = "SBM likelihood" temp_dict["Score"] = score out_dicts.append(temp_dict) # DCSBM likelihood dcsbm = DCSBMEstimator(directed=True, loops=False) dcsbm.fit(bin_adj, y=pred_labels) score = dcsbm.score(bin_adj) temp_dict = base_dict.copy() temp_dict["Metric"] = "DCSBM likelihood" temp_dict["Score"] = score out_dicts.append(temp_dict)
class_labels = class_labels[not_pendant_inds] skeleton_labels = skeleton_labels[not_pendant_inds] file_loc = "maggot_models/notebooks/outs/39.2-BDP-unbiased-clustering/objs/gmm-k18-AutoGMM-LSE-right-ad-PTR-raw.pickle" gmm = pickle.load(open(file_loc, "rb")) # # %% [markdown] # # # node_signal_flow = signal_flow(adj) mean_sf = np.zeros(k) for i in np.unique(pred_labels): inds = np.where(pred_labels == i)[0] mean_sf[i] = np.mean(node_signal_flow[inds]) cluster_mean_latent = gmm.model_.means_[:, 0] block_probs = SBMEstimator().fit(bin_adj, y=pred_labels).block_p_ block_prob_df = pd.DataFrame(data=block_probs, index=range(k), columns=range(k)) block_g = nx.from_pandas_adjacency(block_prob_df, create_using=nx.DiGraph) plt.figure(figsize=(10, 10)) # don't ever let em tell you you're too pythonic pos = dict(zip(range(k), zip(cluster_mean_latent, mean_sf))) # nx.draw_networkx_nodes(block_g, pos=pos) labels = nx.get_edge_attributes(block_g, "weight") # nx.draw_networkx_edge_labels(block_g, pos, edge_labels=labels) from matplotlib.cm import ScalarMappable import matplotlib as mpl norm = mpl.colors.LogNorm(vmin=0.01, vmax=0.1)
#%% import matplotlib.pyplot as plt import matplotlib as mpl import numpy as np from graspy.models import DCSBMEstimator, RDPGEstimator, SBMEstimator from graspy.plot import heatmap from src.data import load_right # Load data right_adj, right_labels = load_right() # Fit the models sbm = SBMEstimator(directed=True, loops=False) sbm.fit(right_adj, y=right_labels) dcsbm = DCSBMEstimator(degree_directed=False, directed=True, loops=False) dcsbm.fit(right_adj, y=right_labels) rdpg = RDPGEstimator(loops=False, n_components=3) rdpg.fit(right_adj) # Plotting np.random.seed(8888) cmap = mpl.cm.get_cmap("RdBu_r") center = 0 vmin = 0 vmax = 1 norm = mpl.colors.Normalize(0, 1)
hue="n_block_try", palette=cmap, **plt_kws, ) plt.xlabel("# Params (SBM params for SBMs)") plt.ylabel("MSE") plt.title(f"Drosophila old MB left, directed ({experiment}:{run})") plt.savefig(save_dir / "rank_sbm_Klines.pdf", format="pdf", facecolor="w") #%% from graspy.models import SBMEstimator from graspy.datasets import load_drosophila_left, load_drosophila_right from graspy.utils import binarize sbm = SBMEstimator(directed=True, loops=False) left_adj, left_labels = load_drosophila_left(return_labels=True) left_adj = binarize(left_adj) sbm.fit(left_adj, y=left_labels) sbm.mse(left_adj) sbm._n_parameters() right_adj, right_labels = load_drosophila_right(return_labels=True) er = SBMEstimator(directed=True, loops=False, n_blocks=2) er.fit(left_adj) er.mse(left_adj) heatmap(left_adj, inner_hier_labels=er.vertex_assignments_, outer_hier_labels=left_labels) #%%
sharex=True, ) fg = fg.map(sns.distplot, "Signal flow") fg.set(yticks=()) stashfig("sf-dists-separate") fg = sns.FacetGrid(total_df, col="Input", aspect=2, hue="Block", margin_titles=True, sharex=True) fg = fg.map(sns.distplot, "Signal flow") fg.add_legend() fg.set(yticks=()) stashfig("sf-dists-squished") # %% from graspy.models import SBMEstimator sbm = SBMEstimator() sbm.fit(A) B_hat = sbm.block_p_ sns.heatmap(B_hat) z = signal_flow(B_hat) sort_inds = np.argsort(z)[::-1] plt.figure() sns.heatmap(B_hat[np.ix_(sort_inds, sort_inds)])
# %% [markdown] # ## from graspy.models import SBMEstimator level = 2 n_row = 3 n_col = 7 scale = 10 fig, axs = plt.subplots(n_row, n_col, figsize=(n_row * scale, n_col * scale)) for level in range(8): label_name = f"lvl{level}_labels_side" sbm = SBMEstimator(directed=True, loops=True) sbm.fit(binarize(full_adj), full_meta[label_name].values) ax = axs[1, level] _, _, top, _ = adjplot( sbm.p_mat_, ax=ax, plot_type="heatmap", sort_class=["hemisphere"] + level_names[: level + 1], item_order=["merge_class_sf_order", "merge_class", "sf"], class_order="sf", meta=full_mg.meta, palette=CLASS_COLOR_DICT, colors="merge_class", ticks=False, gridline_kws=dict(linewidth=0.05, color="grey", linestyle="--"), cbar_kws=dict(shrink=0.6),
plt.style.use("seaborn-white") right_graph, right_labels = load_right() np.random.seed(8888) n_init = 200 clip = 1 / (right_graph.size - right_graph.shape[0]) heatmap_kws = dict(vmin=0, vmax=1, font_scale=1.5, hier_label_fontsize=20, cbar=False) fig, ax = plt.subplots(4, 2, figsize=(15, 30)) # A priori SBM ap_estimator = SBMEstimator() ap_estimator.fit(right_graph, y=right_labels) lik = ap_estimator.score(right_graph, clip=clip) heatmap( right_graph, inner_hier_labels=right_labels, title="Right MB (by cell type)", ax=ax[0, 0], **heatmap_kws, ) heatmap( ap_estimator.p_mat_, inner_hier_labels=right_labels, title=f"A priori SBM, lik = {lik:.2f}",
def test_SBM_inputs(self): with pytest.raises(TypeError): SBMEstimator(directed="hey") with pytest.raises(TypeError): SBMEstimator(loops=6) with pytest.raises(TypeError): SBMEstimator(n_components="XD") with pytest.raises(ValueError): SBMEstimator(n_components=-1) with pytest.raises(TypeError): SBMEstimator(min_comm="1") with pytest.raises(ValueError): SBMEstimator(min_comm=-1) with pytest.raises(TypeError): SBMEstimator(max_comm="ay") with pytest.raises(ValueError): SBMEstimator(max_comm=-1) with pytest.raises(ValueError): SBMEstimator(min_comm=4, max_comm=2) graph = er_np(100, 0.5) bad_y = np.zeros(99) sbe = SBMEstimator() with pytest.raises(ValueError): sbe.fit(graph, y=bad_y) with pytest.raises(ValueError): sbe.fit(graph[:, :99]) with pytest.raises(ValueError): sbe.fit(graph[..., np.newaxis]) with pytest.raises(TypeError): SBMEstimator(cluster_kws=1) with pytest.raises(TypeError): SBMEstimator(embed_kws=1)
adj, cbar=False, title="Adjacency matrix", inner_hier_labels=labels, sort_nodes=True, hier_label_fontsize=16, ) mean_degree = np.mean(np.sum(adj, axis=0)) print(f"Mean degree: {mean_degree:.3f}") # %% [markdown] # ## Double checking the model parameters # Below is a quick sanity check that the graph we sampled has block probabilities that are # close to what we set originally if we undo the rescaling step. # %% double checking on model params sbme = SBMEstimator(directed=False, loops=False) sbme.fit(adj, y=labels) block_p_hat = sbme.block_p_ block_heatmap(block_p_hat, title=r"Observed $\hat{B}$") block_p_hat_unscaled = block_p_hat * 1 / scaling_factor block_heatmap(block_p_hat_unscaled, title=r"Observed $\hat{B}$ (unscaled)") # %% [markdown] # ## Spectral embedding # Here I use graspy to do ASE, LSE, and regularized LSE. Note that we're just using the # SVDs here. There is an option on whether to throw out the first eigenvector. #%% embeddings embed_kws = dict(n_components=k + 1, algorithm="full", check_lcc=False) ase = AdjacencySpectralEmbed(**embed_kws) lse = LaplacianSpectralEmbed(form="DAD", **embed_kws) rlse = LaplacianSpectralEmbed(form="R-DAD", **embed_kws)
vmax=1, font_scale=1.5, title="DCER probability matrix", sort_nodes=True) plt.savefig("DCERProbabilityMatrix", bbox_inches='tight') heatmap(dcer.sample()[0], inner_hier_labels=labels, font_scale=1.5, title="DCER sample", sort_nodes=True) plt.savefig("DCERSample", bbox_inches='tight') sbme = SBMEstimator(directed=True,loops=False) sbme.fit(adj, y=labels) print("SBM \"B\" matrix:") print(sbme.block_p_) heatmap(sbme.p_mat_, inner_hier_labels=labels, vmin=0, vmax=1, font_scale=1.5, title="SBM probability matrix", sort_nodes=True) plt.savefig("SBMProbabilityMatrix", bbox_inches='tight') heatmap(sbme.sample()[0], inner_hier_labels=labels,