def test_DCER_fit(self): np.random.seed(8888) graph = self.graph p_mat = self.p_mat dcsbe = DCSBMEstimator(directed=True, loops=False) dcsbe.fit(graph) assert_allclose(p_mat, dcsbe.p_mat_, atol=0.12)
def test_DCSBM_fit_supervised(self): p_mat = self.p_mat labels = self.labels g = self.g dcsbe = DCSBMEstimator(directed=True, loops=False) dcsbe.fit(g, y=labels) assert_allclose(dcsbe.p_mat_, p_mat, atol=0.1)
def test_DCSBM_score(self): p_mat = self.p_mat graph = self.g estimator = DCSBMEstimator() _test_score(estimator, p_mat, graph) with pytest.raises(ValueError): estimator.score_samples(graph=graph[1:100, 1:100])
def test_DCSBM_fit_unsupervised(self): np.random.seed(12345) n_verts = 1500 distances = np.random.beta(4, 1, n_verts) B = np.array([[0.7, 0.1, 0.1], [0.1, 0.9, 0.1], [0.05, 0.1, 0.75]]) n = np.array([500, 500, 500]) labels = _n_to_labels(n) p_mat = _block_to_full(B, labels, (n_verts, n_verts)) p_mat = p_mat * np.outer(distances, distances) p_mat -= np.diag(np.diag(p_mat)) graph = sample_edges(p_mat, directed=True, loops=False) dcsbe = DCSBMEstimator(directed=True, loops=False) dcsbe.fit(graph) assert adjusted_rand_score(labels, dcsbe.vertex_assignments_) > 0.95 assert_allclose(p_mat, dcsbe.p_mat_, atol=0.12)
def run_fit(seed, directed): # run left graph, labels = load_left() print(labels) if not directed: graph = symmetrize(graph, method="avg") # fit SBM sbm = SBMEstimator(directed=True, loops=False) sbm_left_df = fit_a_priori(sbm, graph, labels) print(sbm_left_df["n_params"]) save_obj(sbm_left_df, file_obs, "sbm_left_df") # fit DCSBM dcsbm = DCSBMEstimator(directed=True, loops=False, degree_directed=False) dcsbm_left_df = fit_a_priori(dcsbm, graph, labels) save_obj(dcsbm_left_df, file_obs, "dcsbm_left_df") # fit dDCSBM ddcsbm = DCSBMEstimator(directed=True, loops=False, degree_directed=True) ddcsbm_left_df = fit_a_priori(ddcsbm, graph, labels) save_obj(ddcsbm_left_df, file_obs, "ddcsbm_left_df") # run right graph, labels = load_right() if not directed: graph = symmetrize(graph, method="avg") # fit SBM sbm = SBMEstimator(directed=True, loops=False) sbm_right_df = fit_a_priori(sbm, graph, labels) save_obj(sbm_right_df, file_obs, "sbm_right_df") # fit DCSBM dcsbm = DCSBMEstimator(directed=True, loops=False, degree_directed=False) dcsbm_right_df = fit_a_priori(dcsbm, graph, labels) save_obj(dcsbm_right_df, file_obs, "dcsbm_right_df") # fit dDCSBM ddcsbm = DCSBMEstimator(directed=True, loops=False, degree_directed=True) ddcsbm_right_df = fit_a_priori(ddcsbm, graph, labels) save_obj(ddcsbm_right_df, file_obs, "ddcsbm_right_df") return 0
def select_dcsbm( graph, param_grid, directed=True, degree_directed=False, metric="mse", c=0, rank="full", n_jobs=1, n_init=1, ): # common parameters of all estimators dcsbm = DCSBMEstimator(directed=directed, degree_directed=degree_directed, loops=False, metric=metric) # define scoring functions to evaluate models scorers = gen_scorers(dcsbm, graph) # run the grid search grid_search = GridSearchUS( dcsbm, param_grid, scoring=scorers, n_jobs=n_jobs, verbose=0, refit=False, n_init=n_init, ) grid_search.fit(graph) # format outputs out_df = grid_search.cv_results_ if "param_embed_kws" in out_df.columns: out_df["param_regularizer"] = [ v["regularizer"] for v in out_df["param_embed_kws"].values ] return out_df
def test_DCSBM_nparams(self): n_verts = 3000 n_class = 4 graph = self.g labels = self.labels e = DCSBMEstimator(directed=True) e.fit(graph) assert e._n_parameters() == (n_verts + n_class - 1 + n_class**2) e = DCSBMEstimator(directed=True) e.fit(graph, y=labels) assert e._n_parameters() == (n_verts + n_class**2) e = DCSBMEstimator(directed=True, degree_directed=True) e.fit(graph, y=labels) assert e._n_parameters() == (2 * n_verts + n_class**2) e = DCSBMEstimator(directed=False) e.fit(graph, y=labels) assert e._n_parameters() == (n_verts + 10)
def test_DCSBM_sample(self): np.random.seed(8888) estimator = DCSBMEstimator(directed=True, loops=False) B = np.array([[0.9, 0.1], [0.1, 0.9]]) dc = np.random.uniform(0.25, 0.75, size=100) labels = _n_to_labels([50, 50]) p_mat = _block_to_full(B, labels, (100, 100)) p_mat = p_mat * np.outer(dc, dc) p_mat -= np.diag(np.diag(p_mat)) g = sample_edges(p_mat, directed=True) with pytest.raises(NotFittedError): estimator.sample() estimator.fit(g, y=labels) with pytest.raises(ValueError): estimator.sample(n_samples=-1) with pytest.raises(TypeError): estimator.sample(n_samples="nope") estimator.p_mat_ = p_mat _test_sample(estimator, p_mat, n_samples=1000, atol=0.1)
def test_DCSBM_inputs(self): with pytest.raises(TypeError): DCSBMEstimator(directed="hey") with pytest.raises(TypeError): DCSBMEstimator(loops=6) with pytest.raises(TypeError): DCSBMEstimator(n_components="XD") with pytest.raises(ValueError): DCSBMEstimator(n_components=-1) with pytest.raises(TypeError): DCSBMEstimator(min_comm="1") with pytest.raises(ValueError): DCSBMEstimator(min_comm=-1) with pytest.raises(TypeError): DCSBMEstimator(max_comm="ay") with pytest.raises(ValueError): DCSBMEstimator(max_comm=-1) with pytest.raises(ValueError): DCSBMEstimator(min_comm=4, max_comm=2) graph = er_np(100, 0.5) bad_y = np.zeros(99) dcsbe = DCSBMEstimator() with pytest.raises(ValueError): dcsbe.fit(graph, y=bad_y) with pytest.raises(ValueError): dcsbe.fit(graph[:, :99]) with pytest.raises(ValueError): dcsbe.fit(graph[..., np.newaxis]) with pytest.raises(TypeError): DCSBMEstimator(cluster_kws=1) with pytest.raises(TypeError): DCSBMEstimator(embed_kws=1)
vmax=1, font_scale=1.5, title="SBM probability matrix", sort_nodes=True) plt.savefig("SBMProbabilityMatrix", bbox_inches='tight') heatmap(sbme.sample()[0], inner_hier_labels=labels, font_scale=1.5, title="SBM sample", sort_nodes=True) plt.savefig("SBMSample", bbox_inches='tight') dcsbme = DCSBMEstimator(directed=True,loops=False) dcsbme.fit(adj, y=labels) print("DCSBM \"B\" matrix:") print(dcsbme.block_p_) heatmap(dcsbme.p_mat_, inner_hier_labels=labels, font_scale=1.5, title="DCSBM probability matrix", vmin=0, vmax=1, sort_nodes=True) plt.savefig("DCSBMProbabilityMatrix", bbox_inches='tight') heatmap(dcsbme.sample()[0], inner_hier_labels=labels,
right_graph, inner_hier_labels=pred_labels, title="Right MB (by SBM block)", ax=ax[1, 0], **heatmap_kws, ) heatmap( gs.model_.p_mat_, inner_hier_labels=pred_labels, title=f"Fit SBM, lik = {lik:.2f}", ax=ax[1, 1], **heatmap_kws, ) # A priori DCSBM ap_estimator = DCSBMEstimator() ap_estimator.fit(right_graph, y=right_labels) lik = ap_estimator.score(right_graph, clip=clip) heatmap( right_graph, inner_hier_labels=right_labels, title="Right MB (by cell type)", ax=ax[2, 0], **heatmap_kws, ) heatmap( ap_estimator.p_mat_, inner_hier_labels=right_labels, title=f"A priori DCSBM, lik = {lik:.2f}", ax=ax[2, 1], **heatmap_kws,
import matplotlib as mpl import numpy as np from graspy.models import DCSBMEstimator, RDPGEstimator, SBMEstimator from graspy.plot import heatmap from src.data import load_right # Load data right_adj, right_labels = load_right() # Fit the models sbm = SBMEstimator(directed=True, loops=False) sbm.fit(right_adj, y=right_labels) dcsbm = DCSBMEstimator(degree_directed=False, directed=True, loops=False) dcsbm.fit(right_adj, y=right_labels) rdpg = RDPGEstimator(loops=False, n_components=3) rdpg.fit(right_adj) # Plotting np.random.seed(8888) cmap = mpl.cm.get_cmap("RdBu_r") center = 0 vmin = 0 vmax = 1 norm = mpl.colors.Normalize(0, 1) cc = np.linspace(0.5, 1, 256) cmap = mpl.colors.ListedColormap(cmap(cc))
#%% from src.data import load_left from graspy.models import DCSBMEstimator graph, labels = load_left() dcsbm = DCSBMEstimator(directed=True, loops=False, degree_directed=True) dcsbm.fit(graph, y=labels) dcsbm.mse(graph) #%% from src.models import GridSearchUS from src.models import select_rdpg n_init = 3 n_components_try = range(1, 5) param_grid = dict(n_components=n_components_try) select_rdpg(graph, param_grid) #%% from graspy.utils import cartprod import numpy as np s = range(20, 25) f = np.random.uniform(size=5) out = cartprod(s, f) from itertools import product out = product(s, f) for i, j in product(s, f): print(i)
from graspy.utils import remove_loops X = ase_flat_embed[:, :d] n_pairs = len(X) // 2 new_lp_inds = np.arange(n_pairs) new_rp_inds = np.arange(n_pairs).copy() + n_pairs rows = [] for l in range(n_levels): labels = new_meta[f"lvl{l}_labels"].values left_adj = binarize(new_adj[np.ix_(new_lp_inds, new_lp_inds)]) left_adj = remove_loops(left_adj) right_adj = binarize(new_adj[np.ix_(new_rp_inds, new_rp_inds)]) right_adj = remove_loops(right_adj) dcsbm = DCSBMEstimator(directed=True, loops=False) uni_labels, inv = np.unique(labels, return_inverse=True) dcsbm.fit(left_adj, inv[new_lp_inds]) train_left_p = dcsbm.p_mat_ train_left_p[train_left_p == 0] = 1 / train_left_p.size score = poisson.logpmf(left_adj, train_left_p).sum() rows.append( dict(train_side="left", test="same", test_side="left", score=score, level=l)) score = poisson.logpmf(right_adj, train_left_p).sum() rows.append( dict(train_side="left",
# %% [markdown] # ## # %% [markdown] # ## # %% [markdown] # ## pairplot(embed, labels=pred_labels, palette=cc.glasbey_light) # %% [markdown] # ## sbm = DCSBMEstimator(directed=True, degree_directed=True, loops=False, max_comm=30) sbm.fit(binarize(adj)) pred_labels = sbm.vertex_assignments_ print(len(np.unique(pred_labels))) meta["pred_labels"] = pred_labels graph = np.squeeze(sbm.sample()) meta["adj_sf"] = -signal_flow(binarize(adj)) block_sf = -signal_flow(sbm.block_p_) block_map = pd.Series(data=block_sf) meta["block_sf"] = meta["pred_labels"].map(block_map)
temp_dict = base_dict.copy() temp_dict["Metric"] = "GMM BIC" temp_dict["Score"] = score out_dicts.append(temp_dict) # SBM likelihood sbm = SBMEstimator(directed=True, loops=False) sbm.fit(bin_adj, y=pred_labels) score = sbm.score(bin_adj) temp_dict = base_dict.copy() temp_dict["Metric"] = "SBM likelihood" temp_dict["Score"] = score out_dicts.append(temp_dict) # DCSBM likelihood dcsbm = DCSBMEstimator(directed=True, loops=False) dcsbm.fit(bin_adj, y=pred_labels) score = dcsbm.score(bin_adj) temp_dict = base_dict.copy() temp_dict["Metric"] = "DCSBM likelihood" temp_dict["Score"] = score out_dicts.append(temp_dict) # ARI of the subset with labels score = sub_ari(known_inds, class_labels, pred_labels) temp_dict = base_dict.copy() temp_dict["Metric"] = "Simple ARI" temp_dict["Score"] = score out_dicts.append(temp_dict) # ARI vs K - 1
ax = axs[1] adjplot( adj, meta=meta, sort_class=["hemisphere", "lvl0_labels"], colors="merge_class", palette=CLASS_COLOR_DICT, class_order=["signal_flow"], item_order=["te"], plot_type="scattermap", sizes=(0.5, 0.5), ax=ax, ticks=False, ) estimator = DCSBMEstimator(degree_directed=True, directed=True, loops=False) estimator.fit(adj, meta["lvl0_labels"].values) sample = np.squeeze(estimator.sample()) ax = axs[0] adjplot( sample, meta=meta, sort_class=["hemisphere", "lvl0_labels"], colors="merge_class", palette=CLASS_COLOR_DICT, class_order=["signal_flow"], item_order=["te"], plot_type="scattermap", sizes=(0.5, 0.5), ax=ax, ticks=False,
def test_DCSBM_score(self): p_mat = self.p_mat graph = self.g estimator = DCSBMEstimator() _test_score(estimator, p_mat, graph)