Exemplo n.º 1
0
def bilateral_ase(adj):
    ase = AdjacencySpectralEmbed(n_components=None,
                                 n_elbows=2,
                                 check_lcc=False)
    ipsi_adj = adj.copy()
    ipsi_adj[np.ix_(left_inds, right_inds)] = 0
    ipsi_adj[np.ix_(right_inds, left_inds)] = 0
    ipsi_embed = ase.fit_transform(ipsi_adj)

    procrust = Procrustes()
    align_ipsi_embed = []
    for e in ipsi_embed:
        procrust.fit(e, x_seeds=lp_inds, y_seeds=rp_inds)
        align_e = procrust.transform(e, map_inds=left_inds)
        align_ipsi_embed.append(align_e)
    align_ipsi_embed = np.concatenate(align_ipsi_embed, axis=1)

    contra_adj = adj.copy()
    contra_adj[np.ix_(left_inds, left_inds)] = 0
    contra_adj[np.ix_(right_inds, right_inds)] = 0
    contra_embed = ase.fit_transform(contra_adj)

    procrust = Procrustes()
    align_contra_embed = []
    for e in contra_embed:
        procrust.fit(e, x_seeds=lp_inds, y_seeds=rp_inds)
        align_e = procrust.transform(e, map_inds=left_inds)
        align_contra_embed.append(align_e)
    align_contra_embed = np.concatenate(align_contra_embed, axis=1)
    return align_ipsi_embed, align_contra_embed
Exemplo n.º 2
0
def ase(adj, n_components, ptr=True):
    if ptr:
        adj = pass_to_ranks(adj)
    ase = AdjacencySpectralEmbed(n_components=n_components)
    latent = ase.fit_transform(adj)
    latent = np.concatenate(latent, axis=-1)
    return latent
Exemplo n.º 3
0
class ASEEmbedding(Embedding):
    """Implements an interface for adjacency spectral embedding; inherits from the Embedding class.

    """
    def __init__(self):
        self.model = AdjacencySpectralEmbed()

    def fit(self, X, S=None):
        Xh = np.hstack(self.model.fit_transform(X))
        if S is not None:
            Xh = np.hstack((Xh, S))
        clusterer = GaussianMixture(n_components=Xh.shape[1] // 2)
        clusterer.fit(Xh)
        predict_labels = clusterer.predict(Xh)
        self.y = predict_labels
        self.H = Xh

    def learn_embedding(self, G, S=None, **kwargs):
        X = nx.adjacency_matrix(G)
        X = X.todense()
        Xh = np.hstack(self.model.fit_transform(X))
        if S is not None:
            Xh = np.hstack((Xh, S))
        clusterer = GaussianMixture(n_components=Xh.shape[1] // 2)
        clusterer.fit(Xh)
        predict_labels = clusterer.predict(Xh)
        self.y = predict_labels
        self.H = Xh

    def get_reconstructed_adj(self, *a, **b):
        return self.model.latent_left_.dot(np.diag(
            self.model.singular_values_)).dot(self.model.latent_right_.T)
Exemplo n.º 4
0
def evaluate_models(graph,
                    labels=None,
                    title=None,
                    plot_graphs=False,
                    min_comp=0,
                    max_comp=1,
                    n_comp=5):

    if plot_graphs:
        heatmap(graph, inner_hier_labels=cell_labels)

    ## Set up models to test
    non_rdpg_models = [
        EREstimator(fit_degrees=False),
        SBEstimator(fit_degrees=False),
        SBEstimator(fit_degrees=True),
    ]

    d = [6]
    rdpg_models = [RDPGEstimator(n_components=i) for i in d]
    models = non_rdpg_models + rdpg_models

    names_nonRDPG = ["ER", "SBM", "DCSBM"]
    names_RDPG = ["RDPGrank{}".format(i) for i in d]
    names = names_nonRDPG + names_RDPG

    bics = []
    log_likelihoods = []

    ## Test models
    for model, name in zip(models, names):
        m = model.fit(graph, y=labels)
        if plot_graphs:
            heatmap(m.p_mat_,
                    inner_hier_labels=labels,
                    title=(name + "P matrix"))
            heatmap(m.sample(),
                    inner_hier_labels=labels,
                    title=(name + "sample"))
        bic = m.bic(graph)
        log_likelihoods.append(m.score(graph))
        bics.append(bic)
        plt.show()
        ase = AdjacencySpectralEmbed(n_components=2)
        latent = ase.fit_transform(m.p_mat_)
        # if type(latent) is tuple:
        #     pairplot(np.concatenate((latent[0], latent[1]), axis=1))
        #     plt.show()
        # else:
        print("here")
        # plt.figure(figsize=(20, 20))
        ax = scatterplot(latent,
                         labels=cell_labels,
                         height=4,
                         alpha=0.6,
                         font_scale=1.25)
        # plt.suptitle(name, y=0.94, x=0.1, fontsize=30, horizontalalignment="left")
        plt.savefig(name + "latent.png", format="png", dpi=1000)
        plt.close()
Exemplo n.º 5
0
def lse(adj, n_components, regularizer=None):
    if PTR:
        adj = pass_to_ranks(adj)
    lap = to_laplace(adj, form="R-DAD")
    ase = AdjacencySpectralEmbed(n_components=n_components)
    latent = ase.fit_transform(lap)
    latent = np.concatenate(latent, axis=-1)
    return latent
Exemplo n.º 6
0
def level(adj, meta, pred, reembed=False, X=None, R=None, plot_all=True):
    uni_labels, inv = np.unique(pred, return_inverse=True)
    all_sub_results = []
    sub_data = []

    for label in uni_labels:
        print(label)
        print()
        label_mask = pred == label
        sub_meta = meta[label_mask].copy()
        sub_meta["inds"] = range(len(sub_meta))
        sub_left_inds = sub_meta[sub_meta["left"]]["inds"].values
        sub_right_inds = sub_meta[sub_meta["right"]]["inds"].values
        sub_lp_inds, sub_rp_inds = get_paired_inds(sub_meta)
        sub_adj = adj[np.ix_(label_mask, label_mask)]

        if reembed:
            ase = AdjacencySpectralEmbed()
            # TODO look into PTR at this level as well
            sub_embed = ase.fit_transform(sub_adj)
            sub_X = np.concatenate(sub_embed, axis=1)
            sub_R, _ = orthogonal_procrustes(sub_X[sub_lp_inds],
                                             sub_X[sub_rp_inds])
            sub_X[sub_left_inds] = sub_X[sub_left_inds] @ sub_R
        else:
            sub_X = X[label_mask].copy()
            sub_R = R

        var_dict = {
            "meta": sub_meta,
            "left_inds": sub_left_inds,
            "right_inds": sub_right_inds,
            "left_pair_inds": sub_lp_inds,
            "right_pair_inds": sub_rp_inds,
            "X": sub_X,
            "adj": sub_adj,
            "reembed": reembed,
        }

        sub_data.append(var_dict)

        sub_results = crossval_cluster(
            sub_X,
            sub_left_inds,
            sub_right_inds,
            left_pair_inds=sub_lp_inds,
            right_pair_inds=sub_rp_inds,
            max_clusters=8,
            min_clusters=1,
            n_init=50,
        )

        fig, axs = plot_metrics(sub_results, plot_all=plot_all)
        fig.suptitle(f"Clustering for cluster {label}, reembed={reembed}")
        stashfig(f"cluster-profile-label={label}-reembed={reembed}")
        plt.close()
        all_sub_results.append(sub_results)
        return all_sub_results, sub_data
Exemplo n.º 7
0
def normalized_ase(graph, n_components=None, embed_kws={}):
    ase = AdjacencySpectralEmbed(n_components=n_components, **embed_kws)
    latent = ase.fit_transform(graph)
    if isinstance(latent, tuple):
        latent = np.concatenate(latent, axis=-1)
    norm_vec = np.linalg.norm(latent, axis=1)
    norm_vec[norm_vec == 0] = 1
    norm_latent = latent / norm_vec[:, np.newaxis]
    return norm_latent
Exemplo n.º 8
0
def get(n=50):
    ns = [n, n]
    p1 = np.array([[.9,.1],[.1,.9]])
    p2 = np.array([[.9,.1],[.1,.9]])
    A1 = sbm(ns,p1)
    A2 = sbm(ns,p2)
    X1 = AdjacencySpectralEmbed().fit_transform(A1)
    X2 = AdjacencySpectralEmbed().fit_transform(A2)
    return X1, X2
Exemplo n.º 9
0
def ase_concatenate(adjs, n_components, ptr=True):
    if ptr:
        adjs = [pass_to_ranks(a) for a in adjs]
    ase = AdjacencySpectralEmbed(n_components=n_components // len(adjs))
    graph_latents = []
    for a in adjs:
        latent = ase.fit_transform(a)
        latent = np.concatenate(latent, axis=-1)
        graph_latents.append(latent)
    latent = np.concatenate(graph_latents, axis=-1)
    return latent
Exemplo n.º 10
0
def lse(adj, n_components, regularizer=None, ptr=True):
    if ptr:
        adj = pass_to_ranks(adj)
    lap = to_laplace(adj, form="R-DAD", regularizer=regularizer)
    ase = AdjacencySpectralEmbed(n_components=n_components, diag_aug=False)
    latent = ase.fit_transform(lap)
    # latent = LaplacianSpectralEmbed(
    #     form="R-DAD", n_components=n_components, regularizer=regularizer
    # )
    latent = np.concatenate(latent, axis=-1)
    return latent
Exemplo n.º 11
0
def ase_procrust_svd(embed_adjs):
    ase = AdjacencySpectralEmbed(n_components=None)
    all_embeds = []
    for a in embed_adjs:
        embed = ase.fit_transform(a)
        embed = np.concatenate(embed, axis=1)
        embed[left_inds] = (embed[left_inds] @ orthogonal_procrustes(
            embed[lp_inds], embed[rp_inds])[0])
        print(embed.shape)
        all_embeds.append(embed)
    cat_embed = np.concatenate(all_embeds, axis=1)
    print(cat_embed.shape)
    U, S, Vt = selectSVD(cat_embed, n_elbows=3)
    return U
Exemplo n.º 12
0
def embed_ase(*, adj, n_components=None):
    """ JHU AdjacencySpectralEmbed, w/ default settings """
    X_ase = AdjacencySpectralEmbed(n_components=n_components).fit_transform(adj.toarray())
    if isinstance(X_ase, tuple):
        X_ase = np.column_stack(X_ase)
    
    return X_ase
Exemplo n.º 13
0
def to_minigraph(
    adj,
    labels,
    drop_neg=True,
    remove_diag=True,
    size_scaler=1,
    use_counts=False,
    use_weights=True,
    color_map=None,
):
    # convert the adjacency and a partition to a minigraph based on SBM probs
    prob_df = get_blockmodel_df(
        adj, labels, return_counts=use_counts, use_weights=use_weights
    )
    if drop_neg and ("-1" in prob_df.index):
        prob_df.drop("-1", axis=0, inplace=True)
        prob_df.drop("-1", axis=1, inplace=True)

    if remove_diag:
        adj = prob_df.values
        adj -= np.diag(np.diag(adj))
        prob_df.data = prob_df

    g = nx.from_pandas_adjacency(prob_df, create_using=nx.DiGraph())
    uni_labels, counts = np.unique(labels, return_counts=True)

    # add size attribute base on number of vertices
    size_map = dict(zip(uni_labels, size_scaler * counts))
    nx.set_node_attributes(g, size_map, name="Size")

    # add signal flow attribute (for the minigraph itself)
    mini_adj = nx.to_numpy_array(g, nodelist=uni_labels)
    node_signal_flow = signal_flow(mini_adj)
    sf_map = dict(zip(uni_labels, node_signal_flow))
    nx.set_node_attributes(g, sf_map, name="Signal Flow")

    # add spectral properties
    sym_adj = symmetrize(mini_adj)
    n_components = 10
    latent = AdjacencySpectralEmbed(n_components=n_components).fit_transform(sym_adj)
    for i in range(n_components):
        latent_dim = latent[:, i]
        lap_map = dict(zip(uni_labels, latent_dim))
        nx.set_node_attributes(g, lap_map, name=f"AdjEvec-{i}")

    # add spring layout properties
    pos = nx.spring_layout(g)
    spring_x = {}
    spring_y = {}
    for key, val in pos.items():
        spring_x[key] = val[0]
        spring_y[key] = val[1]
    nx.set_node_attributes(g, spring_x, name="Spring-x")
    nx.set_node_attributes(g, spring_y, name="Spring-y")

    # add colors
    if color_map is None:
        color_map = dict(zip(uni_labels, cc.glasbey_light))
    nx.set_node_attributes(g, color_map, name="Color")
    return g
Exemplo n.º 14
0
    def _embed(self, A1, A2):
        if self.n_components is None:
            num_dims1 = select_dimension(A1)[0][-1]
            num_dims2 = select_dimension(A2)[0][-1]
            self.n_components = max(num_dims1, num_dims2)

        ase = AdjacencySpectralEmbed(n_components=self.n_components)
        X1_hat = ase.fit_transform(A1)
        X2_hat = ase.fit_transform(A2)

        if isinstance(X1_hat, tuple) and isinstance(X2_hat, tuple):
            X1_hat = np.concatenate(X1_hat, axis=-1)
            X2_hat = np.concatenate(X2_hat, axis=-1)
        elif isinstance(X1_hat, tuple) ^ isinstance(X2_hat, tuple):
            msg = ("input graphs do not have same directedness. "
                   "consider symmetrizing the directed graph.")
            raise ValueError(msg)

        return X1_hat, X2_hat
Exemplo n.º 15
0
def estimate_assignments(graph,
                         n_communities,
                         n_components=None,
                         method="gc",
                         metric=None):
    """Given a graph and n_comunities, sweeps over covariance structures
    Not deterministic
    Not using graph bic or mse to calculate best

    1. Does an embedding on the raw graph
    2. GaussianCluster on the embedding. This will sweep covariance structure for the 
       given n_communities
    3. Returns n_parameters based on the number used in GaussianCluster

    method can be "gc" or "bc" 

    method 
    "gc" : use graspy GaussianCluster
        this defaults to full covariance
    "bc" : tommyclust with defaults
        so sweep covariance, agglom, linkage
    "bc-metric" : tommyclust with custom metric
        still sweep everything
    "bc-none" : mostly for testing, should behave just like GaussianCluster

    """
    embed_graph = graph.copy()
    latent = AdjacencySpectralEmbed(
        n_components=n_components).fit_transform(embed_graph)
    if isinstance(latent, tuple):
        latent = np.concatenate(latent, axis=1)
    if method == "gc":
        gc = GaussianCluster(
            min_components=n_communities,
            max_components=n_communities,
            covariance_type="all",
        )
        vertex_assignments = gc.fit_predict(latent)
        n_params = gc.model_._n_parameters()
    elif method == "bc":
        vertex_assignments, n_params = brute_cluster(latent, [n_communities])
    elif method == "bc-metric":
        vertex_assignments, n_params = brute_cluster(latent, [n_communities],
                                                     metric=metric)
    elif method == "bc-none":
        vertex_assignments, n_params = brute_cluster(
            latent,
            [n_communities],
            affinities=["none"],
            linkages=["none"],
            covariance_types=["full"],
        )
    else:
        raise ValueError("Unspecified clustering method")
    return (vertex_assignments, n_params)
Exemplo n.º 16
0
    def _embed(self, adj=None):
        if adj is None:
            adj = self.adj
        # TODO look into PTR at this level as well
        # lp_inds, rp_inds = get_paired_inds(self.meta)
        lp_inds = self.left_pair_inds
        rp_inds = self.right_pair_inds

        embed_adj = pass_to_ranks(adj)
        if self.embed == "ase":
            embedder = AdjacencySpectralEmbed(
                n_components=self.n_components, n_elbows=self.n_elbows
            )
            embed = embedder.fit_transform(embed_adj)
        elif self.embed == "lse":
            embedder = LaplacianSpectralEmbed(
                n_components=self.n_components,
                n_elbows=self.n_elbows,
                regularizer=self.regularizer,
            )
            embed = embedder.fit_transform(embed_adj)
        elif self.embed == "unscaled_ase":
            embed_adj = pass_to_ranks(adj)
            embed_adj = augment_diagonal(embed_adj)
            embed = selectSVD(
                embed_adj, n_components=self.n_components, n_elbows=self.n_elbows
            )
            embed = (embed[0], embed[2].T)

        X = np.concatenate(embed, axis=1)

        fraction_paired = (len(lp_inds) + len(rp_inds)) / len(self.root_inds)
        print(f"Learning transformation with {fraction_paired} neurons paired")
        R, _ = orthogonal_procrustes(X[lp_inds], X[rp_inds])
        X[self.left_inds] = X[self.left_inds] @ R

        if self.normalize:
            row_sums = np.sum(X, axis=1)
            X /= row_sums[:, None]

        return X
 def test_passing_embeddings(self):
     np.random.seed(123)
     A1 = er_np(20, 0.8)
     A2 = er_np(20, 0.8)
     ase_1 = AdjacencySpectralEmbed(n_components=2)
     X1 = ase_1.fit_transform(A1)
     ase_2 = AdjacencySpectralEmbed(n_components=2)
     X2 = ase_2.fit_transform(A2)
     ase_3 = AdjacencySpectralEmbed(n_components=1)
     X3 = ase_3.fit_transform(A2)
     # check embeddings having weird ndim
     with self.assertRaises(ValueError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict(X1, X2.reshape(-1, 1, 1))
     with self.assertRaises(ValueError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict(X1.reshape(-1, 1, 1), X2)
     # check embeddings having mismatching number of components
     with self.assertRaises(ValueError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict(X1, X3)
     with self.assertRaises(ValueError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict(X3, X1)
     # check passing weird stuff as input (caught by us)
     with self.assertRaises(TypeError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict("hello there", X1)
     with self.assertRaises(TypeError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict(X1, "hello there")
     with self.assertRaises(TypeError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict({"hello": "there"}, X1)
     with self.assertRaises(TypeError):
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict(X1, {"hello": "there"})
     # check passing infinite in input (caught by check_array)
     with self.assertRaises(ValueError):
         X1_w_inf = X1.copy()
         X1_w_inf[1, 1] = np.inf
         ldt = LatentDistributionTest(input_graph=False)
         ldt.fit_predict(X1_w_inf, X2)
     # check that the appropriate input works
     ldt = LatentDistributionTest(input_graph=False)
     ldt.fit_predict(X1, X2)
Exemplo n.º 18
0
    def _embed(self, adj=None):
        if adj is None:
            adj = self.adj

        lp_inds = self.left_pair_inds
        rp_inds = self.right_pair_inds

        embed_adj = pass_to_ranks(adj)  # TODO PTR here?
        if self.plus_c:
            embed_adj += 1 / adj.size
        if self.embed == "ase":
            embedder = AdjacencySpectralEmbed(n_components=self.n_components,
                                              n_elbows=self.n_elbows)
            embed = embedder.fit_transform(embed_adj)
        elif self.embed == "lse":
            embedder = LaplacianSpectralEmbed(
                n_components=self.n_components,
                n_elbows=self.n_elbows,
                regularizer=self.regularizer,
            )
            embed = embedder.fit_transform(embed_adj)
        elif self.embed == "unscaled_ase":
            embed_adj = augment_diagonal(embed_adj)
            embed = selectSVD(embed_adj,
                              n_components=self.n_components,
                              n_elbows=self.n_elbows)
            embed = (embed[0], embed[2].T)

        X = np.concatenate(embed, axis=1)

        fraction_paired = (len(lp_inds) + len(rp_inds)) / len(self.root_inds)
        print(f"Learning transformation with {fraction_paired} neurons paired")

        X = self._procrustes(X)

        if self.normalize:
            row_norms = np.linalg.norm(X, axis=1)
            X /= row_norms[:, None]

        return X
Exemplo n.º 19
0
def add_attributes(
    g,
    drop_neg=True,
    remove_diag=True,
    size_scaler=1,
    use_counts=False,
    use_weights=True,
    color_map=None,
):
    nodelist = list(g.nodes())

    # add spectral properties
    sym_adj = symmetrize(nx.to_numpy_array(g, nodelist=nodelist))
    n_components = 10
    latent = AdjacencySpectralEmbed(
        n_components=n_components).fit_transform(sym_adj)
    for i in range(n_components):
        latent_dim = latent[:, i]
        lap_map = dict(zip(nodelist, latent_dim))
        nx.set_node_attributes(g, lap_map, name=f"AdjEvec-{i}")

    # add spring layout properties
    pos = nx.spring_layout(g)
    spring_x = {}
    spring_y = {}
    for key, val in pos.items():
        spring_x[key] = val[0]
        spring_y[key] = val[1]
    nx.set_node_attributes(g, spring_x, name="Spring-x")
    nx.set_node_attributes(g, spring_y, name="Spring-y")

    # add colors
    # nx.set_node_attributes(g, color_map, name="Color")
    for node, data in g.nodes(data=True):
        c = data["cell_class"]
        color = CLASS_COLOR_DICT[c]
        data["color"] = color

    # add size attribute base on number of edges
    size_map = dict(path_graph.degree(weight="weight"))
    nx.set_node_attributes(g, size_map, name="Size")

    return g
Exemplo n.º 20
0
def mc_iter(n, m, p, q, tilde, i=1):
    X_graph = er_np(n, p*p)
    ase = AdjacencySpectralEmbed(n_components=1)
    X = ase.fit_transform(X_graph)

    Y_graph = er_np(m, q*q)
    ase = AdjacencySpectralEmbed(n_components=1)
    Y = ase.fit_transform(Y_graph)

    if tilde:
        X_new, Y_new = sample_noisy_points(X, Y)
    else:
        X_new, Y_new = X, Y

    ldt = LatentDistributionTest()
    pval = ldt.fit(X_new, Y_new, pass_graph=False)
    return pval
Exemplo n.º 21
0
# close to what we set originally if we undo the rescaling step.
# %% double checking on model params
sbme = SBMEstimator(directed=False, loops=False)
sbme.fit(adj, y=labels)
block_p_hat = sbme.block_p_
block_heatmap(block_p_hat, title=r"Observed $\hat{B}$")
block_p_hat_unscaled = block_p_hat * 1 / scaling_factor
block_heatmap(block_p_hat_unscaled, title=r"Observed $\hat{B}$ (unscaled)")

# %% [markdown]
# ## Spectral embedding
# Here I use graspy to do ASE, LSE, and regularized LSE. Note that we're just using the
# SVDs here. There is an option on whether to throw out the first eigenvector.
#%% embeddings
embed_kws = dict(n_components=k + 1, algorithm="full", check_lcc=False)
ase = AdjacencySpectralEmbed(**embed_kws)
lse = LaplacianSpectralEmbed(form="DAD", **embed_kws)
rlse = LaplacianSpectralEmbed(form="R-DAD", **embed_kws)

ase_embed = ase.fit_transform(adj)
lse_embed = lse.fit_transform(adj)
rlse_embed = rlse.fit_transform(adj)
embeddings_list = [ase_embed, lse_embed, rlse_embed]

remove_first = True
for i, embedding in enumerate(embeddings_list):
    if remove_first:
        embeddings_list[i] = embedding[:, 1:]
    else:
        embeddings_list[i] = embedding[:, :k]
Exemplo n.º 22
0
stashfig("random-sf")

adj_df = pd.DataFrame()
adj_df["Signal flow"] = true_z
adj_df["Input"] = "Adjacency"
adj_df["Block"] = labels

# fg = sns.FacetGrid(dist_df, col="Label", col_wrap=2, aspect=2, hue="Label")
# fg.map(sns.distplot, "Signal flow")
# stashfig("sf-dists")

# %% [markdown]
# # try with p_hat
from graspy.embed import AdjacencySpectralEmbed

latent = AdjacencySpectralEmbed(n_components=n_blocks).fit_transform(A)
P_hat = latent[0] @ latent[1].T
# P_hat -= P_hat.min()
heatmap(P_hat, title=r"$\hat{P}$")
stashfig("p-hat")
true_z = signal_flow(P_hat)
sort_inds = np.argsort(true_z)[::-1]
heatmap(
    A[np.ix_(sort_inds, sort_inds)],
    cbar=False,
    title=r"Feedforward SBM, sorted by $\hat{P}$ signal flow",
)
stashfig("ffSBM-phat-sf")

A_fake = A.copy().ravel()
np.random.shuffle(A_fake)
Exemplo n.º 23
0
def simulation(n,
               pi,
               normal_params,
               beta_params,
               cond_ind=True,
               errors=None,
               smooth=False,
               acorn=None):
    #- Type checks
    if isinstance(normal_params, list):
        sbm_check = False
        # there are other checks to do..
    elif isinstance(normal_params, np.ndarray):
        if normal_params.ndim is 2:
            if np.sum(normal_params == normal_params.T) == np.prod(
                    normal_params.shape):
                sbm_check = True
            else:
                msg = 'if normal_params is a 2 dimensional array it must be symmetric'
                raise ValueError(msg)
        else:
            msg = 'if normal_params is an array, it must be a 2 dimensional array'
            raise TypeError(msg)
    else:
        msg = 'normal_params must be either a list or a 2 dimensional array'
        raise TypeError(msg)

    if acorn is None:
        acorn = np.random.randint(10**6)
    np.random.seed(acorn)

    #- Multinomial trial
    counts = np.random.multinomial(n, [pi, 1 - pi])

    #- Hard code the number of blocks
    K = 2

    #- Set labels
    labels = np.concatenate((np.zeros(counts[0]), np.ones(counts[1])))

    #- number of seeds = n_{i}/10
    n_seeds = np.round(0.1 * counts).astype(int)

    #- Set training and test data
    class_train_idx = [
        range(np.sum(counts[:k]),
              np.sum(counts[:k]) + n_seeds[k]) for k in range(K)
    ]
    train_idx = np.concatenate((class_train_idx)).astype(int)

    test_idx = [k for k in range(n) if k not in train_idx]

    #- Total number of seeds
    m = np.sum(n_seeds)

    #- estimate class probabilities
    pi_hats = n_seeds / m

    #- Sample from beta distributions
    beta_samples = beta_sampler(counts, beta_params)
    Z = beta_samples

    #- Sample from multivariate normal or SBM either independently of Zs or otherwise
    if cond_ind:
        if sbm_check:
            A = sbm(counts, normal_params)
            ase_obj = ASE(n_elbows=1)
            X = ase_obj.fit_transform(A)
        else:
            X = MVN_sampler(counts, normal_params)
            if len(normal_params[0][0]) is 1:
                X = X[:, np.newaxis]
    else:
        if sbm_check:
            P = blowup(
                normal_params, counts
            )  # A big version of B to be able to change connectivity probabilities of individual nodes
            scales = np.prod(Z, axis=1)**(
                1 / Z.shape[1]
            )  # would do just the outer product, but if the Z's are too small we risk not being connected
            new_P = P * (scales @ scale.T)  # new probability matrix
            A = sbm(np.ones(n).astype(int), new_P)
            ase_obj = ASE(n_elbows=1)
            X = ase_obj.fit_transform(A)
        else:
            X = conditional_MVN_sampler(Z=Z,
                                        rho=1,
                                        counts=counts,
                                        params=normal_params,
                                        seed=None)
            if len(normal_params[0][0]) is 1:
                X = X[:, np.newaxis]

    XZ = np.concatenate((X, Z), axis=1)

    #- Estimate normal parameters using seeds
    params = []
    for i in range(K):
        temp_mu, temp_cov = estimate_normal_parameters(X[class_train_idx[i]])
        params.append([temp_mu, temp_cov])

    #- Using conditional indendence assumption (RF, KNN used for posterior estimates)
    if errors is None:
        errors = [[] for i in range(5)]

    rf1 = RF(n_estimators=100,
             max_depth=int(np.round(np.log(Z[train_idx].shape[0]))))
    rf1.fit(Z[train_idx], labels[train_idx])

    knn1 = KNN(n_neighbors=int(np.round(np.log(Z[train_idx].shape[0]))))
    knn1.fit(Z[train_idx], labels[train_idx])

    if smooth:
        temp_pred = classify(X[test_idx], Z[test_idx], params, rf1, m=m)
        temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx)
        errors[0].append(temp_error)

        temp_pred = classify(X[test_idx], Z[test_idx], params, knn1, m=m)
        temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx)
        errors[1].append(temp_error)
    else:
        temp_pred = classify(X[test_idx], Z[test_idx], params, rf1)
        temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx)
        errors[0].append(temp_error)

        knn1 = KNN(n_neighbors=int(np.round(np.log(m))))
        knn1.fit(Z[train_idx], labels[train_idx])

        temp_pred = classify(X[test_idx], Z[test_idx], params, knn1)
        temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx)
        errors[1].append(temp_error)

    temp_pred = QDA(X[test_idx], pi_hats, params)
    temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx)
    errors[2].append(temp_error)

    #- Not using conditional independence assumption (RF, KNN used for classification)
    XZseeds = np.concatenate((X[train_idx], Z[train_idx]), axis=1)

    rf2 = RF(n_estimators=100, max_depth=int(np.round(np.log(m))))
    rf2.fit(XZ[train_idx], labels[train_idx])
    temp_pred = rf2.predict(XZ[test_idx])
    temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx)
    errors[3].append(temp_error)

    knn2 = KNN(n_neighbors=int(np.round(np.log(m))))
    knn2.fit(XZ[train_idx], labels[train_idx])

    temp_pred = knn2.predict(XZ[test_idx])
    temp_error = 1 - np.sum(temp_pred == labels[test_idx]) / len(test_idx)
    errors[4].append(temp_error)

    temp_accuracy = GCN(adj, features, train_idx, labels)
    temp_error = 1 - temp_accuracy
    errors[5].append(temp_error)

    return errors
Exemplo n.º 24
0
def _ase_embed(mat, atlas, graph_path, ID, subgraph_name="whole_brain"):
    """

    Class for computing the adjacency spectral embedding of a graph.

    The adjacency spectral embedding (ASE) is a k-dimensional Euclidean representation
    of the graph based on its adjacency matrix. It relies on an SVD to reduce
    the dimensionality to the specified k, or if k is unspecified, can find a number of
    dimensions automatically

    Parameters
    ----------
    graphs : list of nx.Graph or ndarray, or ndarray
        If list of nx.Graph, each Graph must contain same number of nodes.
        If list of ndarray, each array must have shape (n_vertices, n_vertices).
        If ndarray, then array must have shape (n_graphs, n_vertices, n_vertices).
    atlas : str
    graph_path : str
    ID : str
    subgraph_name : str

    Returns
    -------
    out_path : str
        File path to .npy file containing ASE embedding tensor.

    Notes
    -----
    The singular value decomposition:

    .. math:: A = U \Sigma V^T

    is used to find an orthonormal basis for a matrix, which in our case is the
    adjacency matrix of the graph. These basis vectors (in the matrices U or V) are
    ordered according to the amount of variance they explain in the original matrix.
    By selecting a subset of these basis vectors (through our choice of dimensionality
    reduction) we can find a lower dimensional space in which to represent the graph.

    References
    ----------
    .. [1] Sussman, D.L., Tang, M., Fishkind, D.E., Priebe, C.E.  "A
      Consistent Adjacency Spectral Embedding for Stochastic Blockmodel Graphs,"
      Journal of the American Statistical Association, Vol. 107(499), 2012

    """
    import numpy as np
    from pynets.core.utils import flatten
    from graspy.embed import AdjacencySpectralEmbed
    from joblib import dump
    from graspy.utils import get_lcc

    # Adjacency Spectral embedding
    print(
        f"{'Embedding unimod asetome for atlas: '}{atlas}{' and '}{subgraph_name}{'...'}"
    )
    ase = AdjacencySpectralEmbed()
    ase_fit = ase.fit_transform(get_lcc(mat))

    dir_path = str(Path(os.path.dirname(graph_path)).parent)

    namer_dir = f"{dir_path}/embeddings"
    if not os.path.isdir(namer_dir):
        os.makedirs(namer_dir, exist_ok=True)

    out_path = f"{namer_dir}/{list(flatten(ID))[0]}_{atlas}_{subgraph_name}_asetome.npy"
    out_path_est = f"{namer_dir}/{list(flatten(ID))[0]}_{atlas}_{subgraph_name}_asetome_estimator.joblib"

    dump(ase, out_path_est)

    print("Saving...")
    np.save(out_path, ase_fit)
    del ase, ase_fit

    return out_path
Exemplo n.º 25
0
from graspy.simulations import er_np

# Experiment parameters

n_verts = 200
p = 0.5
n_components = 1
n_sims = 1000

# Run experiment

estimated_latents = np.zeros((n_sims, 2))
for i in range(n_sims):
    graph = er_np(n_verts, p, directed=False, loops=False)

    ase_diag = AdjacencySpectralEmbed(n_components=n_components, diag_aug=True)

    ase = AdjacencySpectralEmbed(n_components=n_components, diag_aug=False)

    diag_latent = ase_diag.fit_transform(graph)
    ase_latent = ase.fit_transform(graph)

    mean_diag_latent = np.mean(diag_latent)
    mean_latent = np.mean(ase_latent)
    estimated_latents[i, 0] = mean_diag_latent
    estimated_latents[i, 1] = mean_latent

diffs = estimated_latents - np.sqrt(p)  # the true latent position is sqrt(p)

# Plot results
Exemplo n.º 26
0
#%%
%matplotlib inline
from graspy.plot import *
from graspy.simulations import sbm
from graspy.embed import AdjacencySpectralEmbed
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
g = sbm([100, 100], [[0.8, 0.2], [0.2, 0.8]])
ase = AdjacencySpectralEmbed()
X = ase.fit_transform(g)
labels = 25 * [0] + 25 * [1] + 25 * [2] + 24 * [-1] + [-2]
# pairplot(X, size=50, alpha=0.6)

plt.show()


Exemplo n.º 27
0
meta = mg.meta

adj = mg.adj
adj = pass_to_ranks(adj)
meta["inds"] = range(len(meta))

left_inds = meta[meta["left"]]["inds"]
right_inds = meta[meta["right"]]["inds"]
lp_inds, rp_inds = get_paired_inds(meta)

# %% [markdown]
# ## Embed
# Here the embedding is ASE, with PTR and DiagAug, the number of embedding dimensions
# is for now set to ZG2 (4 + 4). Using the known pairs as "seeds", the left embedding
# is matched to the right using procrustes.
ase = AdjacencySpectralEmbed(n_components=None, n_elbows=2)
embed = ase.fit_transform(adj)
n_components = embed[0].shape[1]  # use all of ZG2
X = np.concatenate((embed[0][:, :n_components], embed[1][:, :n_components]),
                   axis=-1)
R, _ = orthogonal_procrustes(X[lp_inds], X[rp_inds])

if CLUSTER_SPLIT == "best":
    X[left_inds] = X[left_inds] @ R

# %% [markdown]
# ## Clustering
# Clustering is performed using Gaussian mixture modeling. At each candidate value of k,
# 50 models are trained on the left embedding, 50 models are trained on the right
# embedding (choosing the best covariance structure based on BIC on the train set).
results = crossval_cluster(
Exemplo n.º 28
0
# )

#%%

from graspy.embed import AdjacencySpectralEmbed, OmnibusEmbed
from graspy.utils import pass_to_ranks
from graspy.plot import pairplot


sum_adj = np.sum(np.array(mb_color_graphs), axis=0)

n_components = 4

#
ptr_adj = pass_to_ranks(sum_adj)
ase = AdjacencySpectralEmbed(n_components=n_components)
sum_latent = ase.fit_transform(ptr_adj)
sum_latent = np.concatenate(sum_latent, axis=-1)
pairplot(sum_latent, labels=mb_class_labels)

ptr_color_adjs = [pass_to_ranks(a) for a in mb_color_graphs]
# graph_sum = [np.sum(a) for a in mb_color_graphs]
# ptr_color_adjs = [ptr_color_adjs[i] + (1 / graph_sum[i]) for i in range(4)]
omni = OmnibusEmbed(n_components=n_components // 4)
color_latent = omni.fit_transform(ptr_color_adjs)
color_latent = np.concatenate(color_latent, axis=-1)
color_latent = np.concatenate(color_latent, axis=-1)
pairplot(color_latent, labels=mb_class_labels)

from graspy.embed import MultipleASE
Exemplo n.º 29
0
ax.set_yticklabels([name0, name1, name2, name3])
# %% [markdown]
# ##
matrixplot(
    path_indicator_mat[:50, :50],
    plot_type="scattermap",
    sizes=(0.2, 0.2),
    hue="weight",
    palette=sns.color_palette("husl", n_colors=10),
    ax=ax,
)

# %% [markdown]
# ##

embedder = AdjacencySpectralEmbed(n_components=None, n_elbows=2)
embed = embedder.fit_transform(adj)
embed = np.concatenate(embed, axis=-1)
pairplot(embed, labels=labels, palette="tab20")

# %% [markdown]
# ## Run paths
print(f"Running {n_init} random walks from each source node...")

transition_probs = to_markov_matrix(adj)

out_inds = np.where(labels == n_blocks - 1)[0]
source_inds = np.where(labels == 0)[0]


def rw_from_node(s):
Exemplo n.º 30
0
left_inds = meta[meta["left"]]["inds"]
right_inds = meta[meta["right"]]["inds"]


def remove_axis(ax):
    remove_spines(ax)
    ax.set_xlabel("")
    ax.set_ylabel("")
    ax.set_xticks([])
    ax.set_yticks([])


method = "ortho"

print("Embedding graph...")
embedder = AdjacencySpectralEmbed(n_components=None, n_elbows=2)
in_embed, out_embed = embedder.fit_transform(pass_to_ranks(adj))
procrust = Procrustes(method=method)
# procrust.fit(in_embed, x_seeds=lp_inds, y_seeds=rp_inds)
embed = np.concatenate((in_embed, out_embed), axis=-1)

dim1 = 0
dim2 = 4

fig, axs = plt.subplots(2, 2, figsize=(20, 20))
plot_df = pd.DataFrame(data=embed[:, [0, 1]])
plot_df["merge_class"] = meta["merge_class"].values
ax = axs[0, 0]
sns.scatterplot(
    data=plot_df,
    x=0,