示例#1
0
 def test_DCER_fit(self):
     np.random.seed(8888)
     graph = self.graph
     p_mat = self.p_mat
     dcsbe = DCSBMEstimator(directed=True, loops=False)
     dcsbe.fit(graph)
     assert_allclose(p_mat, dcsbe.p_mat_, atol=0.12)
示例#2
0
 def test_DCSBM_fit_supervised(self):
     p_mat = self.p_mat
     labels = self.labels
     g = self.g
     dcsbe = DCSBMEstimator(directed=True, loops=False)
     dcsbe.fit(g, y=labels)
     assert_allclose(dcsbe.p_mat_, p_mat, atol=0.1)
示例#3
0
    def test_DCSBM_score(self):
        p_mat = self.p_mat
        graph = self.g
        estimator = DCSBMEstimator()
        _test_score(estimator, p_mat, graph)

        with pytest.raises(ValueError):
            estimator.score_samples(graph=graph[1:100, 1:100])
示例#4
0
    def test_DCSBM_fit_unsupervised(self):
        np.random.seed(12345)
        n_verts = 1500

        distances = np.random.beta(4, 1, n_verts)
        B = np.array([[0.7, 0.1, 0.1], [0.1, 0.9, 0.1], [0.05, 0.1, 0.75]])
        n = np.array([500, 500, 500])
        labels = _n_to_labels(n)
        p_mat = _block_to_full(B, labels, (n_verts, n_verts))
        p_mat = p_mat * np.outer(distances, distances)
        p_mat -= np.diag(np.diag(p_mat))
        graph = sample_edges(p_mat, directed=True, loops=False)
        dcsbe = DCSBMEstimator(directed=True, loops=False)
        dcsbe.fit(graph)
        assert adjusted_rand_score(labels, dcsbe.vertex_assignments_) > 0.95
        assert_allclose(p_mat, dcsbe.p_mat_, atol=0.12)
示例#5
0
def run_fit(seed, directed):
    # run left
    graph, labels = load_left()
    print(labels)
    if not directed:
        graph = symmetrize(graph, method="avg")

    # fit SBM
    sbm = SBMEstimator(directed=True, loops=False)
    sbm_left_df = fit_a_priori(sbm, graph, labels)
    print(sbm_left_df["n_params"])
    save_obj(sbm_left_df, file_obs, "sbm_left_df")

    # fit DCSBM
    dcsbm = DCSBMEstimator(directed=True, loops=False, degree_directed=False)
    dcsbm_left_df = fit_a_priori(dcsbm, graph, labels)
    save_obj(dcsbm_left_df, file_obs, "dcsbm_left_df")

    # fit dDCSBM
    ddcsbm = DCSBMEstimator(directed=True, loops=False, degree_directed=True)
    ddcsbm_left_df = fit_a_priori(ddcsbm, graph, labels)
    save_obj(ddcsbm_left_df, file_obs, "ddcsbm_left_df")

    # run right
    graph, labels = load_right()
    if not directed:
        graph = symmetrize(graph, method="avg")

    # fit SBM
    sbm = SBMEstimator(directed=True, loops=False)
    sbm_right_df = fit_a_priori(sbm, graph, labels)
    save_obj(sbm_right_df, file_obs, "sbm_right_df")

    # fit DCSBM
    dcsbm = DCSBMEstimator(directed=True, loops=False, degree_directed=False)
    dcsbm_right_df = fit_a_priori(dcsbm, graph, labels)
    save_obj(dcsbm_right_df, file_obs, "dcsbm_right_df")

    # fit dDCSBM
    ddcsbm = DCSBMEstimator(directed=True, loops=False, degree_directed=True)
    ddcsbm_right_df = fit_a_priori(ddcsbm, graph, labels)
    save_obj(ddcsbm_right_df, file_obs, "ddcsbm_right_df")

    return 0
示例#6
0
def select_dcsbm(
    graph,
    param_grid,
    directed=True,
    degree_directed=False,
    metric="mse",
    c=0,
    rank="full",
    n_jobs=1,
    n_init=1,
):
    # common parameters of all estimators
    dcsbm = DCSBMEstimator(directed=directed,
                           degree_directed=degree_directed,
                           loops=False,
                           metric=metric)

    # define scoring functions to evaluate models
    scorers = gen_scorers(dcsbm, graph)

    # run the grid search
    grid_search = GridSearchUS(
        dcsbm,
        param_grid,
        scoring=scorers,
        n_jobs=n_jobs,
        verbose=0,
        refit=False,
        n_init=n_init,
    )
    grid_search.fit(graph)

    # format outputs
    out_df = grid_search.cv_results_
    if "param_embed_kws" in out_df.columns:
        out_df["param_regularizer"] = [
            v["regularizer"] for v in out_df["param_embed_kws"].values
        ]

    return out_df
示例#7
0
    def test_DCSBM_nparams(self):
        n_verts = 3000
        n_class = 4
        graph = self.g
        labels = self.labels
        e = DCSBMEstimator(directed=True)
        e.fit(graph)
        assert e._n_parameters() == (n_verts + n_class - 1 + n_class**2)

        e = DCSBMEstimator(directed=True)
        e.fit(graph, y=labels)
        assert e._n_parameters() == (n_verts + n_class**2)

        e = DCSBMEstimator(directed=True, degree_directed=True)
        e.fit(graph, y=labels)
        assert e._n_parameters() == (2 * n_verts + n_class**2)

        e = DCSBMEstimator(directed=False)
        e.fit(graph, y=labels)
        assert e._n_parameters() == (n_verts + 10)
示例#8
0
    def test_DCSBM_sample(self):
        np.random.seed(8888)
        estimator = DCSBMEstimator(directed=True, loops=False)
        B = np.array([[0.9, 0.1], [0.1, 0.9]])
        dc = np.random.uniform(0.25, 0.75, size=100)
        labels = _n_to_labels([50, 50])

        p_mat = _block_to_full(B, labels, (100, 100))
        p_mat = p_mat * np.outer(dc, dc)
        p_mat -= np.diag(np.diag(p_mat))
        g = sample_edges(p_mat, directed=True)

        with pytest.raises(NotFittedError):
            estimator.sample()

        estimator.fit(g, y=labels)
        with pytest.raises(ValueError):
            estimator.sample(n_samples=-1)

        with pytest.raises(TypeError):
            estimator.sample(n_samples="nope")
        estimator.p_mat_ = p_mat
        _test_sample(estimator, p_mat, n_samples=1000, atol=0.1)
示例#9
0
    def test_DCSBM_inputs(self):
        with pytest.raises(TypeError):
            DCSBMEstimator(directed="hey")

        with pytest.raises(TypeError):
            DCSBMEstimator(loops=6)

        with pytest.raises(TypeError):
            DCSBMEstimator(n_components="XD")

        with pytest.raises(ValueError):
            DCSBMEstimator(n_components=-1)

        with pytest.raises(TypeError):
            DCSBMEstimator(min_comm="1")

        with pytest.raises(ValueError):
            DCSBMEstimator(min_comm=-1)

        with pytest.raises(TypeError):
            DCSBMEstimator(max_comm="ay")

        with pytest.raises(ValueError):
            DCSBMEstimator(max_comm=-1)

        with pytest.raises(ValueError):
            DCSBMEstimator(min_comm=4, max_comm=2)

        graph = er_np(100, 0.5)
        bad_y = np.zeros(99)
        dcsbe = DCSBMEstimator()
        with pytest.raises(ValueError):
            dcsbe.fit(graph, y=bad_y)

        with pytest.raises(ValueError):
            dcsbe.fit(graph[:, :99])

        with pytest.raises(ValueError):
            dcsbe.fit(graph[..., np.newaxis])

        with pytest.raises(TypeError):
            DCSBMEstimator(cluster_kws=1)

        with pytest.raises(TypeError):
            DCSBMEstimator(embed_kws=1)
示例#10
0
        vmax=1,
        font_scale=1.5,
        title="SBM probability matrix",
        sort_nodes=True)

plt.savefig("SBMProbabilityMatrix", bbox_inches='tight')

heatmap(sbme.sample()[0],
        inner_hier_labels=labels,
        font_scale=1.5,
        title="SBM sample",
        sort_nodes=True)

plt.savefig("SBMSample", bbox_inches='tight')

dcsbme = DCSBMEstimator(directed=True,loops=False)
dcsbme.fit(adj, y=labels)
print("DCSBM \"B\" matrix:")
print(dcsbme.block_p_)
heatmap(dcsbme.p_mat_,
        inner_hier_labels=labels,
        font_scale=1.5,
        title="DCSBM probability matrix",
        vmin=0,
        vmax=1,
        sort_nodes=True)

plt.savefig("DCSBMProbabilityMatrix", bbox_inches='tight')

heatmap(dcsbme.sample()[0],
        inner_hier_labels=labels,
示例#11
0
    right_graph,
    inner_hier_labels=pred_labels,
    title="Right MB (by SBM block)",
    ax=ax[1, 0],
    **heatmap_kws,
)
heatmap(
    gs.model_.p_mat_,
    inner_hier_labels=pred_labels,
    title=f"Fit SBM, lik = {lik:.2f}",
    ax=ax[1, 1],
    **heatmap_kws,
)

# A priori DCSBM
ap_estimator = DCSBMEstimator()
ap_estimator.fit(right_graph, y=right_labels)
lik = ap_estimator.score(right_graph, clip=clip)
heatmap(
    right_graph,
    inner_hier_labels=right_labels,
    title="Right MB (by cell type)",
    ax=ax[2, 0],
    **heatmap_kws,
)
heatmap(
    ap_estimator.p_mat_,
    inner_hier_labels=right_labels,
    title=f"A priori DCSBM, lik = {lik:.2f}",
    ax=ax[2, 1],
    **heatmap_kws,
示例#12
0
import matplotlib as mpl

import numpy as np

from graspy.models import DCSBMEstimator, RDPGEstimator, SBMEstimator
from graspy.plot import heatmap
from src.data import load_right

# Load data
right_adj, right_labels = load_right()

# Fit the models
sbm = SBMEstimator(directed=True, loops=False)
sbm.fit(right_adj, y=right_labels)

dcsbm = DCSBMEstimator(degree_directed=False, directed=True, loops=False)
dcsbm.fit(right_adj, y=right_labels)

rdpg = RDPGEstimator(loops=False, n_components=3)
rdpg.fit(right_adj)

# Plotting
np.random.seed(8888)

cmap = mpl.cm.get_cmap("RdBu_r")
center = 0
vmin = 0
vmax = 1
norm = mpl.colors.Normalize(0, 1)
cc = np.linspace(0.5, 1, 256)
cmap = mpl.colors.ListedColormap(cmap(cc))
示例#13
0
#%%
from src.data import load_left
from graspy.models import DCSBMEstimator

graph, labels = load_left()

dcsbm = DCSBMEstimator(directed=True, loops=False, degree_directed=True)
dcsbm.fit(graph, y=labels)
dcsbm.mse(graph)
#%%
from src.models import GridSearchUS
from src.models import select_rdpg

n_init = 3
n_components_try = range(1, 5)
param_grid = dict(n_components=n_components_try)
select_rdpg(graph, param_grid)

#%%
from graspy.utils import cartprod
import numpy as np

s = range(20, 25)
f = np.random.uniform(size=5)

out = cartprod(s, f)
from itertools import product

out = product(s, f)
for i, j in product(s, f):
    print(i)
示例#14
0
from graspy.utils import remove_loops

X = ase_flat_embed[:, :d]
n_pairs = len(X) // 2
new_lp_inds = np.arange(n_pairs)
new_rp_inds = np.arange(n_pairs).copy() + n_pairs

rows = []
for l in range(n_levels):
    labels = new_meta[f"lvl{l}_labels"].values
    left_adj = binarize(new_adj[np.ix_(new_lp_inds, new_lp_inds)])
    left_adj = remove_loops(left_adj)
    right_adj = binarize(new_adj[np.ix_(new_rp_inds, new_rp_inds)])
    right_adj = remove_loops(right_adj)

    dcsbm = DCSBMEstimator(directed=True, loops=False)
    uni_labels, inv = np.unique(labels, return_inverse=True)
    dcsbm.fit(left_adj, inv[new_lp_inds])
    train_left_p = dcsbm.p_mat_
    train_left_p[train_left_p == 0] = 1 / train_left_p.size

    score = poisson.logpmf(left_adj, train_left_p).sum()
    rows.append(
        dict(train_side="left",
             test="same",
             test_side="left",
             score=score,
             level=l))
    score = poisson.logpmf(right_adj, train_left_p).sum()
    rows.append(
        dict(train_side="left",
示例#15
0
# %% [markdown]
# ##

# %% [markdown]
# ##

# %% [markdown]
# ##

pairplot(embed, labels=pred_labels, palette=cc.glasbey_light)

# %% [markdown]
# ##

sbm = DCSBMEstimator(directed=True,
                     degree_directed=True,
                     loops=False,
                     max_comm=30)
sbm.fit(binarize(adj))
pred_labels = sbm.vertex_assignments_
print(len(np.unique(pred_labels)))

meta["pred_labels"] = pred_labels

graph = np.squeeze(sbm.sample())

meta["adj_sf"] = -signal_flow(binarize(adj))

block_sf = -signal_flow(sbm.block_p_)
block_map = pd.Series(data=block_sf)
meta["block_sf"] = meta["pred_labels"].map(block_map)
    temp_dict = base_dict.copy()
    temp_dict["Metric"] = "GMM BIC"
    temp_dict["Score"] = score
    out_dicts.append(temp_dict)

    # SBM likelihood
    sbm = SBMEstimator(directed=True, loops=False)
    sbm.fit(bin_adj, y=pred_labels)
    score = sbm.score(bin_adj)
    temp_dict = base_dict.copy()
    temp_dict["Metric"] = "SBM likelihood"
    temp_dict["Score"] = score
    out_dicts.append(temp_dict)

    # DCSBM likelihood
    dcsbm = DCSBMEstimator(directed=True, loops=False)
    dcsbm.fit(bin_adj, y=pred_labels)
    score = dcsbm.score(bin_adj)
    temp_dict = base_dict.copy()
    temp_dict["Metric"] = "DCSBM likelihood"
    temp_dict["Score"] = score
    out_dicts.append(temp_dict)

    # ARI of the subset with labels
    score = sub_ari(known_inds, class_labels, pred_labels)
    temp_dict = base_dict.copy()
    temp_dict["Metric"] = "Simple ARI"
    temp_dict["Score"] = score
    out_dicts.append(temp_dict)

    # ARI vs K - 1
示例#17
0
ax = axs[1]
adjplot(
    adj,
    meta=meta,
    sort_class=["hemisphere", "lvl0_labels"],
    colors="merge_class",
    palette=CLASS_COLOR_DICT,
    class_order=["signal_flow"],
    item_order=["te"],
    plot_type="scattermap",
    sizes=(0.5, 0.5),
    ax=ax,
    ticks=False,
)

estimator = DCSBMEstimator(degree_directed=True, directed=True, loops=False)
estimator.fit(adj, meta["lvl0_labels"].values)
sample = np.squeeze(estimator.sample())
ax = axs[0]
adjplot(
    sample,
    meta=meta,
    sort_class=["hemisphere", "lvl0_labels"],
    colors="merge_class",
    palette=CLASS_COLOR_DICT,
    class_order=["signal_flow"],
    item_order=["te"],
    plot_type="scattermap",
    sizes=(0.5, 0.5),
    ax=ax,
    ticks=False,
示例#18
0
 def test_DCSBM_score(self):
     p_mat = self.p_mat
     graph = self.g
     estimator = DCSBMEstimator()
     _test_score(estimator, p_mat, graph)