def run_fit(seed):
    np.random.seed(seed)

    # load
    left_graph, left_labels = load_left()
    right_graph, right_labels = load_right()

    # fit SBM left, predict right
    sbm_fit_left = SBMEstimator(directed=True, loops=False)
    sbm_fit_left.fit(left_graph, y=left_labels)
    right_pred_mse = mse_on_other(sbm_fit_left, right_graph, right_labels)
    right_pred_likelihood = likelihood_on_other(sbm_fit_left, right_graph,
                                                right_labels)
    right_pred_sc_likelihood = likelihood_on_other(
        sbm_fit_left,
        right_graph,
        right_labels,
        clip=1 / (right_graph.size - right_graph.shape[0]),
    )
    right_pred_dict = {
        "n_params": sbm_fit_left._n_parameters(),
        "mse": right_pred_mse,
        "likelihood": right_pred_likelihood,
        "zc_likelihood": right_pred_likelihood,
        "sc_likelihood": right_pred_sc_likelihood,
    }
    right_pred_df = pd.DataFrame(right_pred_dict, index=[0])
    print(right_pred_df)
    save_obj(right_pred_df, file_obs, "right_pred_sbm_df")

    # fit SBM right, predict left
    sbm_fit_right = SBMEstimator(directed=True, loops=False)
    sbm_fit_right.fit(right_graph, y=right_labels)
    left_pred_mse = mse_on_other(sbm_fit_right, left_graph, left_labels)
    left_pred_likelihood = likelihood_on_other(sbm_fit_right, left_graph,
                                               left_labels)
    left_pred_sc_likelihood = likelihood_on_other(
        sbm_fit_right,
        left_graph,
        left_labels,
        clip=1 / (left_graph.size - left_graph.shape[0]),
    )
    left_pred_dict = {
        "n_params": sbm_fit_right._n_parameters(),
        "mse": left_pred_mse,
        "likelihood": left_pred_likelihood,
        "zc_likelihood": left_pred_likelihood,
        "sc_likelihood": left_pred_sc_likelihood,
    }
    left_pred_df = pd.DataFrame(left_pred_dict, index=[0])
    print(left_pred_df)
    save_obj(left_pred_df, file_obs, "left_pred_sbm_df")
    # sbm_fit_right = SBMEstimator(directed=True, loops=False)
    # sbm_fit_right.fit(right_graph, y=right_labels)
    # right_b = sbm_fit_right.block_p_

    # # save_obj(sbm_left_df, file_obs, "sbm_left_df")

    return 0
Exemplo n.º 2
0
 def test_SBM_nparams(self):
     e = self.estimator.fit(self.graph, y=self.labels)
     assert e._n_parameters() == (4)
     e = SBMEstimator()
     e.fit(self.graph)
     assert e._n_parameters() == (4 + 1)
     e = SBMEstimator(directed=False)
     e.fit(self.graph)
     assert e._n_parameters() == (1 + 3)
Exemplo n.º 3
0
def compute_mse_from_assignments(assignments,
                                 graph,
                                 directed=True,
                                 loops=False):
    estimator = SBMEstimator(loops=loops, directed=directed)
    estimator.fit(graph, y=assignments)
    return compute_mse(estimator, graph)
Exemplo n.º 4
0
def dcsbm_objective(adj, labels):
    # class1_var = np.var(input[class1_inds])
    # class2_var = np.var(input[class2_inds])
    dcsbm = SBMEstimator()
    dcsbm.fit(adj, y=labels)
    objective = dcsbm.score(adj)
    return objective
Exemplo n.º 5
0
def probplot(
    adj,
    labels,
    log_scale=False,
    figsize=(20, 20),
    cmap="Purples",
    title="Edge probability",
    vmin=0,
    vmax=None,
    ax=None,
    font_scale=1,
):
    sbm = SBMEstimator(directed=True, loops=True)
    sbm.fit(binarize(adj), y=labels)
    data = sbm.block_p_
    uni_labels = np.unique(labels)

    cbar_kws = {"fraction": 0.08, "shrink": 0.8, "pad": 0.03}

    if log_scale:
        data = data + 0.001
        vmin = data.min().min()
        vmax = data.max().max()
        log_norm = LogNorm(vmin=vmin, vmax=vmax)
        cbar_ticks = [
            math.pow(10, i)
            for i in range(
                math.floor(math.log10(data.min().min())),
                1 + math.ceil(math.log10(data.max().max())),
            )
        ]
        cbar_kws["ticks"] = cbar_ticks

    prob_df = pd.DataFrame(columns=uni_labels, index=uni_labels, data=data)

    if ax is None:
        plt.figure(figsize=figsize)
        ax = plt.gca()

    ax.set_title(title, pad=30, fontsize=30)

    sns.set_context("talk", font_scale=font_scale)

    heatmap_kws = dict(
        cbar_kws=cbar_kws, annot=True, square=True, cmap=cmap, vmin=vmin, vmax=vmax
    )
    if log_scale:
        heatmap_kws["norm"] = log_norm
    if ax is not None:
        heatmap_kws["ax"] = ax
    ax.tick_params(axis="both", which="major", labelsize=30)
    # ax.tick_params(axis="both", which="minor", labelsize=8)
    ax = sns.heatmap(prob_df, **heatmap_kws)

    ax.set_yticklabels(ax.get_yticklabels(), rotation=0)

    return ax, prob_df
Exemplo n.º 6
0
def run_fit(seed, directed):
    # run left
    graph, labels = load_left()
    print(labels)
    if not directed:
        graph = symmetrize(graph, method="avg")

    # fit SBM
    sbm = SBMEstimator(directed=True, loops=False)
    sbm_left_df = fit_a_priori(sbm, graph, labels)
    print(sbm_left_df["n_params"])
    save_obj(sbm_left_df, file_obs, "sbm_left_df")

    # fit DCSBM
    dcsbm = DCSBMEstimator(directed=True, loops=False, degree_directed=False)
    dcsbm_left_df = fit_a_priori(dcsbm, graph, labels)
    save_obj(dcsbm_left_df, file_obs, "dcsbm_left_df")

    # fit dDCSBM
    ddcsbm = DCSBMEstimator(directed=True, loops=False, degree_directed=True)
    ddcsbm_left_df = fit_a_priori(ddcsbm, graph, labels)
    save_obj(ddcsbm_left_df, file_obs, "ddcsbm_left_df")

    # run right
    graph, labels = load_right()
    if not directed:
        graph = symmetrize(graph, method="avg")

    # fit SBM
    sbm = SBMEstimator(directed=True, loops=False)
    sbm_right_df = fit_a_priori(sbm, graph, labels)
    save_obj(sbm_right_df, file_obs, "sbm_right_df")

    # fit DCSBM
    dcsbm = DCSBMEstimator(directed=True, loops=False, degree_directed=False)
    dcsbm_right_df = fit_a_priori(dcsbm, graph, labels)
    save_obj(dcsbm_right_df, file_obs, "dcsbm_right_df")

    # fit dDCSBM
    ddcsbm = DCSBMEstimator(directed=True, loops=False, degree_directed=True)
    ddcsbm_right_df = fit_a_priori(ddcsbm, graph, labels)
    save_obj(ddcsbm_right_df, file_obs, "ddcsbm_right_df")

    return 0
Exemplo n.º 7
0
 def test_SBM_score(self):
     # tests score() and score_sample()
     B = np.array([[0.75, 0.25], [0.25, 0.75]])
     n_verts = 100
     n = np.array([n_verts, n_verts])
     tau = _n_to_labels(n)
     p_mat = _block_to_full(B, tau, shape=(n_verts * 2, n_verts * 2))
     graph = sample_edges(p_mat, directed=True)
     estimator = SBMEstimator(max_comm=4)
     _test_score(estimator, p_mat, graph)
Exemplo n.º 8
0
 def setup_class(cls):
     estimator = SBMEstimator(directed=True, loops=False)
     B = np.array([[0.9, 0.1], [0.1, 0.9]])
     g = sbm([50, 50], B, directed=True)
     labels = _n_to_labels([50, 50])
     p_mat = _block_to_full(B, labels, (100, 100))
     p_mat -= np.diag(np.diag(p_mat))
     cls.estimator = estimator
     cls.p_mat = p_mat
     cls.graph = g
     cls.labels = labels
Exemplo n.º 9
0
def get_sbm_prob(adj, labels):
    sbm = SBMEstimator(directed=True, loops=True)
    sbm.fit(binarize(adj), y=labels)
    data = sbm.block_p_
    uni_labels, counts = np.unique(labels, return_counts=True)
    sort_inds = np.argsort(counts)[::-1]
    uni_labels = uni_labels[sort_inds]
    data = data[np.ix_(sort_inds, sort_inds)]

    prob_df = pd.DataFrame(columns=uni_labels, index=uni_labels, data=data)

    return prob_df
Exemplo n.º 10
0
def get_sbm_prob(adj, labels):
    uni_labels, counts = np.unique(labels, return_counts=True)
    label_map = dict(zip(uni_labels, range(len(uni_labels))))
    y = np.array(itemgetter(*labels)(label_map))
    sbm = SBMEstimator(directed=True, loops=True)
    sbm.fit(binarize(adj), y=y)
    data = sbm.block_p_
    sort_inds = np.argsort(counts)[::-1]
    uni_labels = uni_labels[sort_inds]
    data = data[np.ix_(sort_inds, sort_inds)]

    prob_df = pd.DataFrame(columns=uni_labels, index=uni_labels, data=data)

    return prob_df
Exemplo n.º 11
0
    def test_SBM_fit_unsupervised(self):
        np.random.seed(12345)
        n_verts = 1500

        B = np.array([[0.7, 0.1, 0.1], [0.1, 0.9, 0.1], [0.05, 0.1, 0.75]])
        n = np.array([500, 500, 500])
        labels = _n_to_labels(n)
        p_mat = _block_to_full(B, labels, (n_verts, n_verts))
        p_mat -= np.diag(np.diag(p_mat))
        graph = sample_edges(p_mat, directed=True, loops=False)
        sbe = SBMEstimator(directed=True, loops=False)
        sbe.fit(graph)
        assert adjusted_rand_score(labels, sbe.vertex_assignments_) > 0.95
        assert_allclose(p_mat, sbe.p_mat_, atol=0.12)
Exemplo n.º 12
0
 def test_SBM_fit_supervised(self):
     np.random.seed(8888)
     B = np.array([
         [0.9, 0.2, 0.05, 0.1],
         [0.1, 0.7, 0.1, 0.1],
         [0.2, 0.4, 0.8, 0.5],
         [0.1, 0.2, 0.1, 0.7],
     ])
     n = np.array([500, 500, 250, 250])
     g = sbm(n, B, directed=True, loops=False)
     sbe = SBMEstimator(directed=True, loops=False)
     labels = _n_to_labels(n)
     sbe.fit(g, y=labels)
     B_hat = sbe.block_p_
     assert_allclose(B_hat, B, atol=0.01)
Exemplo n.º 13
0
    adj,
    cbar=False,
    title="Adjacency matrix",
    inner_hier_labels=labels,
    sort_nodes=True,
    hier_label_fontsize=16,
)
mean_degree = np.mean(np.sum(adj, axis=0))
print(f"Mean degree: {mean_degree:.3f}")

# %% [markdown]
# ## Double checking the model parameters
# Below is a quick sanity check that the graph we sampled has block probabilities that are
# close to what we set originally if we undo the rescaling step.
# %% double checking on model params
sbme = SBMEstimator(directed=False, loops=False)
sbme.fit(adj, y=labels)
block_p_hat = sbme.block_p_
block_heatmap(block_p_hat, title=r"Observed $\hat{B}$")
block_p_hat_unscaled = block_p_hat * 1 / scaling_factor
block_heatmap(block_p_hat_unscaled, title=r"Observed $\hat{B}$ (unscaled)")

# %% [markdown]
# ## Spectral embedding
# Here I use graspy to do ASE, LSE, and regularized LSE. Note that we're just using the
# SVDs here. There is an option on whether to throw out the first eigenvector.
#%% embeddings
embed_kws = dict(n_components=k + 1, algorithm="full", check_lcc=False)
ase = AdjacencySpectralEmbed(**embed_kws)
lse = LaplacianSpectralEmbed(form="DAD", **embed_kws)
rlse = LaplacianSpectralEmbed(form="R-DAD", **embed_kws)
    # GMM likelihood
    score = gmm.model_.score(latent)
    temp_dict = base_dict.copy()
    temp_dict["Metric"] = "GMM likelihood"
    temp_dict["Score"] = score
    out_dicts.append(temp_dict)

    # GMM BIC
    score = gmm.model_.bic(latent)
    temp_dict = base_dict.copy()
    temp_dict["Metric"] = "GMM BIC"
    temp_dict["Score"] = score
    out_dicts.append(temp_dict)

    # SBM likelihood
    sbm = SBMEstimator(directed=True, loops=False)
    sbm.fit(bin_adj, y=pred_labels)
    score = sbm.score(bin_adj)
    temp_dict = base_dict.copy()
    temp_dict["Metric"] = "SBM likelihood"
    temp_dict["Score"] = score
    out_dicts.append(temp_dict)

    # DCSBM likelihood
    dcsbm = DCSBMEstimator(directed=True, loops=False)
    dcsbm.fit(bin_adj, y=pred_labels)
    score = dcsbm.score(bin_adj)
    temp_dict = base_dict.copy()
    temp_dict["Metric"] = "DCSBM likelihood"
    temp_dict["Score"] = score
    out_dicts.append(temp_dict)
Exemplo n.º 15
0
    sharex=True,
)
fg = fg.map(sns.distplot, "Signal flow")
fg.set(yticks=())
stashfig("sf-dists-separate")

fg = sns.FacetGrid(total_df,
                   col="Input",
                   aspect=2,
                   hue="Block",
                   margin_titles=True,
                   sharex=True)
fg = fg.map(sns.distplot, "Signal flow")
fg.add_legend()
fg.set(yticks=())
stashfig("sf-dists-squished")

# %%
from graspy.models import SBMEstimator

sbm = SBMEstimator()
sbm.fit(A)
B_hat = sbm.block_p_

sns.heatmap(B_hat)

z = signal_flow(B_hat)
sort_inds = np.argsort(z)[::-1]
plt.figure()
sns.heatmap(B_hat[np.ix_(sort_inds, sort_inds)])
Exemplo n.º 16
0
#%%
from graspy.models import SBMEstimator
from src.data import load_new_left
from graspy.plot import heatmap
import numpy as np

adj, labels = load_new_left()

sbm = SBMEstimator(loops=False, co_block=False)
sbm.fit(adj, y=labels)
heatmap(sbm.p_mat_, inner_hier_labels=labels, vmin=0, vmax=1)

#%%
co_labels = np.stack((labels, labels), axis=1).astype("U3")

for i, row in enumerate(co_labels):
    if row[1] == "O" or row[1] == "I":
        co_labels[i, 1] = "O/I"
co_labels

#%%
cosbm = SBMEstimator(loops=False, co_block=True)
cosbm.fit(adj, y=co_labels)
heatmap(cosbm.p_mat_, inner_hier_labels=labels)

#%%
Exemplo n.º 17
0
# %% [markdown]
# ##

from graspy.models import SBMEstimator

level = 2

n_row = 3
n_col = 7
scale = 10
fig, axs = plt.subplots(n_row, n_col, figsize=(n_row * scale, n_col * scale))

for level in range(8):
    label_name = f"lvl{level}_labels_side"
    sbm = SBMEstimator(directed=True, loops=True)
    sbm.fit(binarize(full_adj), full_meta[label_name].values)
    ax = axs[1, level]
    _, _, top, _ = adjplot(
        sbm.p_mat_,
        ax=ax,
        plot_type="heatmap",
        sort_class=["hemisphere"] + level_names[: level + 1],
        item_order=["merge_class_sf_order", "merge_class", "sf"],
        class_order="sf",
        meta=full_mg.meta,
        palette=CLASS_COLOR_DICT,
        colors="merge_class",
        ticks=False,
        gridline_kws=dict(linewidth=0.05, color="grey", linestyle="--"),
        cbar_kws=dict(shrink=0.6),
Exemplo n.º 18
0
class_labels = class_labels[not_pendant_inds]
skeleton_labels = skeleton_labels[not_pendant_inds]

file_loc = "maggot_models/notebooks/outs/39.2-BDP-unbiased-clustering/objs/gmm-k18-AutoGMM-LSE-right-ad-PTR-raw.pickle"
gmm = pickle.load(open(file_loc, "rb"))

# # %% [markdown]
# # #
node_signal_flow = signal_flow(adj)
mean_sf = np.zeros(k)
for i in np.unique(pred_labels):
    inds = np.where(pred_labels == i)[0]
    mean_sf[i] = np.mean(node_signal_flow[inds])

cluster_mean_latent = gmm.model_.means_[:, 0]
block_probs = SBMEstimator().fit(bin_adj, y=pred_labels).block_p_
block_prob_df = pd.DataFrame(data=block_probs,
                             index=range(k),
                             columns=range(k))
block_g = nx.from_pandas_adjacency(block_prob_df, create_using=nx.DiGraph)
plt.figure(figsize=(10, 10))
# don't ever let em tell you you're too pythonic
pos = dict(zip(range(k), zip(cluster_mean_latent, mean_sf)))
# nx.draw_networkx_nodes(block_g, pos=pos)
labels = nx.get_edge_attributes(block_g, "weight")
# nx.draw_networkx_edge_labels(block_g, pos, edge_labels=labels)
from matplotlib.cm import ScalarMappable
import matplotlib as mpl

norm = mpl.colors.LogNorm(vmin=0.01, vmax=0.1)
Exemplo n.º 19
0
plt.style.use("seaborn-white")
right_graph, right_labels = load_right()

np.random.seed(8888)
n_init = 200
clip = 1 / (right_graph.size - right_graph.shape[0])
heatmap_kws = dict(vmin=0,
                   vmax=1,
                   font_scale=1.5,
                   hier_label_fontsize=20,
                   cbar=False)

fig, ax = plt.subplots(4, 2, figsize=(15, 30))

# A priori SBM
ap_estimator = SBMEstimator()
ap_estimator.fit(right_graph, y=right_labels)

lik = ap_estimator.score(right_graph, clip=clip)

heatmap(
    right_graph,
    inner_hier_labels=right_labels,
    title="Right MB (by cell type)",
    ax=ax[0, 0],
    **heatmap_kws,
)
heatmap(
    ap_estimator.p_mat_,
    inner_hier_labels=right_labels,
    title=f"A priori SBM, lik = {lik:.2f}",
Exemplo n.º 20
0
def select_sbm(
    graph,
    param_grid,
    directed=True,
    co_block=False,
    metric="mse",
    c=0,
    rank="full",
    n_jobs=1,
    n_init=1,
):
    """sweeps over n_components, n_blocks, fits an sbm for each 
    Using GaussianCluster, so will internally sweep covariance structure and pick best

    Returns n_params for the gaussian
    N_params for the sbm kinda
    rss
    score

    Maybe at some point this will sweep rank of B

    Parameters
    ----------
    graph : [type]
        [description]
    n_block_try_range : [type]
        [description]
    n_components_try_range : [type]
        [description]
    directed : bool, optional
        [description], by default False
    """

    # common parameters of all estimators
    sbm = SBMEstimator(directed=directed,
                       loops=False,
                       co_block=co_block,
                       metric=metric,
                       rank=rank)

    # define scoring functions to evaluate models
    scorers = gen_scorers(sbm, graph)

    # run the grid search
    grid_search = GridSearchUS(
        sbm,
        param_grid,
        scoring=scorers,
        n_jobs=n_jobs,
        verbose=0,
        refit=False,
        n_init=n_init,
    )
    grid_search.fit(graph)

    out_df = grid_search.cv_results_
    # out_df["param_regularizer"] = [
    #     v["regularizer"] for v in out_df["param_embed_kws"].values
    # ]
    # out_dict = {}
    # for i, n_components_try in enumerate(n_components_try_range):
    #     for j, n_block_try in enumerate(n_block_try_range):
    #         # check special case for ER, don't need to cluster
    #         if n_block_try == 1:
    #             vertex_assignments = np.zeros(graph.shape[0])
    #             n_params_gmm = 1
    #         else:
    #             vertex_assignments, n_params_gmm = estimate_assignments(
    #                 graph, n_block_try, n_components_try, method=method, metric=metric
    #             )

    #         if rank == "sweep":
    #             rank_try_range = list(range(1, n_block_try + 1))
    #         else:
    #             rank_try_range = [n_block_try]

    #         for k, rank_try in enumerate(rank_try_range):
    #             ind = i * len(n_block_try_range) + j * len(rank_try_range) + k

    #             estimator = SBMEstimator(directed=directed, loops=False, rank=rank_try)
    #             estimator.fit(graph, y=vertex_assignments)

    #             rss = compute_rss(estimator, graph)
    #             mse = compute_mse(estimator, graph)
    #             score = np.sum(estimator.score_samples(graph, clip=c))
    #             n_params_sbm = estimator._n_parameters()
    #             # account for the estimated positions
    #             if type(estimator) == SBMEstimator:
    #                 n_params_sbm += estimator.block_p_.shape[0] - 1

    #             out_dict[ind] = {
    #                 "n_params_gmm": n_params_gmm,
    #                 "n_params_sbm": n_params_sbm,
    #                 "rss": rss,
    #                 "mse": mse,
    #                 "score": score,
    #                 "n_components_try": n_components_try,
    #                 "n_block_try": n_block_try,
    #                 "rank_try": rank_try,
    #             }
    # out_df = pd.DataFrame.from_dict(out_dict, orient="index")
    return out_df
Exemplo n.º 21
0
    def test_SBM_inputs(self):
        with pytest.raises(TypeError):
            SBMEstimator(directed="hey")

        with pytest.raises(TypeError):
            SBMEstimator(loops=6)

        with pytest.raises(TypeError):
            SBMEstimator(n_components="XD")

        with pytest.raises(ValueError):
            SBMEstimator(n_components=-1)

        with pytest.raises(TypeError):
            SBMEstimator(min_comm="1")

        with pytest.raises(ValueError):
            SBMEstimator(min_comm=-1)

        with pytest.raises(TypeError):
            SBMEstimator(max_comm="ay")

        with pytest.raises(ValueError):
            SBMEstimator(max_comm=-1)

        with pytest.raises(ValueError):
            SBMEstimator(min_comm=4, max_comm=2)

        graph = er_np(100, 0.5)
        bad_y = np.zeros(99)
        sbe = SBMEstimator()
        with pytest.raises(ValueError):
            sbe.fit(graph, y=bad_y)

        with pytest.raises(ValueError):
            sbe.fit(graph[:, :99])

        with pytest.raises(ValueError):
            sbe.fit(graph[..., np.newaxis])

        with pytest.raises(TypeError):
            SBMEstimator(cluster_kws=1)

        with pytest.raises(TypeError):
            SBMEstimator(embed_kws=1)
Exemplo n.º 22
0
    hue="n_block_try",
    palette=cmap,
    **plt_kws,
)

plt.xlabel("# Params (SBM params for SBMs)")
plt.ylabel("MSE")
plt.title(f"Drosophila old MB left, directed ({experiment}:{run})")
plt.savefig(save_dir / "rank_sbm_Klines.pdf", format="pdf", facecolor="w")

#%%
from graspy.models import SBMEstimator
from graspy.datasets import load_drosophila_left, load_drosophila_right
from graspy.utils import binarize

sbm = SBMEstimator(directed=True, loops=False)
left_adj, left_labels = load_drosophila_left(return_labels=True)
left_adj = binarize(left_adj)
sbm.fit(left_adj, y=left_labels)
sbm.mse(left_adj)
sbm._n_parameters()

right_adj, right_labels = load_drosophila_right(return_labels=True)

er = SBMEstimator(directed=True, loops=False, n_blocks=2)
er.fit(left_adj)
er.mse(left_adj)
heatmap(left_adj,
        inner_hier_labels=er.vertex_assignments_,
        outer_hier_labels=left_labels)
#%%