示例#1
0
    def test_SBM_epsilon(self):
        np.random.seed(12345678)
        B1 = np.array([[0.5, 0.2], [0.2, 0.5]])
        B2 = np.array([[0.7, 0.2], [0.2, 0.7]])
        b_size = 200
        A1 = sbm(2 * [b_size], B1)
        A2 = sbm(2 * [b_size], B1)
        A3 = sbm(2 * [b_size], B2)

        # non parallel test
        lpt_null = latent_position_test(A1,
                                        A2,
                                        n_components=2,
                                        n_bootstraps=100)
        lpt_alt = latent_position_test(A1,
                                       A3,
                                       n_components=2,
                                       n_bootstraps=100)
        self.assertTrue(lpt_null[0] > 0.05)
        self.assertTrue(lpt_alt[0] <= 0.05)

        # parallel test
        lpt_null = latent_position_test(A1,
                                        A2,
                                        n_components=2,
                                        n_bootstraps=100,
                                        workers=-1)
        lpt_alt = latent_position_test(A1,
                                       A3,
                                       n_components=2,
                                       n_bootstraps=100,
                                       workers=-1)
        self.assertTrue(lpt_null[0] > 0.05)
        self.assertTrue(lpt_alt[0] <= 0.05)
    def test_SBM_dcorr(self):
        np.random.seed(12345678)
        B1 = np.array([[0.5, 0.2], [0.2, 0.5]])

        B2 = np.array([[0.7, 0.2], [0.2, 0.7]])
        b_size = 200
        A1 = sbm(2 * [b_size], B1)
        A2 = sbm(2 * [b_size], B1)
        A3 = sbm(2 * [b_size], B2)
        ldt_null = latent_distribution_test(A1, A2)
        ldt_alt = latent_distribution_test(A1, A3)
        self.assertTrue(ldt_null[0] > 0.05)
        self.assertTrue(ldt_alt[0] <= 0.05)
示例#3
0
def generate_data():
    np.random.seed(1)

    p1 = [[0.2, 0.1], [0.1, 0.2]]
    p2 = [[0.1, 0.2], [0.2, 0.1]]
    n = [50, 50]

    g1 = [sbm(n, p1) for _ in range(20)]
    g2 = [sbm(n, p2) for _ in range(20)]
    g = g1 + g2

    y = ["0"] * 20 + ["1"] * 20

    return g, y
    def test_SBM_epsilon(self):
        np.random.seed(12345678)
        B1 = np.array([[0.5, 0.2], [0.2, 0.5]])

        B2 = np.array([[0.7, 0.2], [0.2, 0.7]])
        b_size = 200
        A1 = sbm(2 * [b_size], B1)
        A2 = sbm(2 * [b_size], B1)
        A3 = sbm(2 * [b_size], B2)

        spt_null = LatentPositionTest(n_components=2, n_bootstraps=100)
        spt_alt = LatentPositionTest(n_components=2, n_bootstraps=100)
        p_null = spt_null.fit_predict(A1, A2)
        p_alt = spt_alt.fit_predict(A1, A3)
        self.assertTrue(p_null > 0.05)
        self.assertTrue(p_alt <= 0.05)
示例#5
0
def gen_sbm(p=.3, q=.15, N=1500):
    """
    Generate an adjacency matrix.
    """
    n = N // 3
    B = np.full((3, 3), q)
    B[np.diag_indices_from(B)] = p
    A = sbm([n, n, n], B, return_labels=True)

    return A
示例#6
0
def gen_sbm(p, q, assortative=True, N=1500):
    if not assortative:
        p, q = q, p

    n = N // 3
    B = np.full((3, 3), q)
    B[np.diag_indices_from(B)] = p
    A = sbm([n, n, n], B, return_labels=True)

    return A
示例#7
0
 def setUp(self) -> None:
     estimator = SBMEstimator(directed=True, loops=False)
     B = np.array([[0.9, 0.1], [0.1, 0.9]])
     g = sbm([50, 50], B, directed=True)
     labels = _n_to_labels([50, 50])
     p_mat = _block_to_full(B, labels, (100, 100))
     p_mat -= np.diag(np.diag(p_mat))
     self.estimator = estimator
     self.p_mat = p_mat
     self.graph = g
     self.labels = labels
示例#8
0
 def setup_class(cls):
     estimator = SBMEstimator(directed=True, loops=False)
     B = np.array([[0.9, 0.1], [0.1, 0.9]])
     g = sbm([50, 50], B, directed=True)
     labels = _n_to_labels([50, 50])
     p_mat = _block_to_full(B, labels, (100, 100))
     p_mat -= np.diag(np.diag(p_mat))
     cls.estimator = estimator
     cls.p_mat = p_mat
     cls.graph = g
     cls.labels = labels
示例#9
0
    def test_SBM_dcorr(self):
        for test in self.tests.keys():
            np.random.seed(12345678)
            B1 = np.array([[0.5, 0.2], [0.2, 0.5]])

            B2 = np.array([[0.7, 0.2], [0.2, 0.7]])
            b_size = 200
            A1 = sbm(2 * [b_size], B1)
            A2 = sbm(2 * [b_size], B1)
            A3 = sbm(2 * [b_size], B2)

            ldt_null = LatentDistributionTest(test,
                                              self.tests[test],
                                              n_components=2,
                                              n_bootstraps=100)
            ldt_alt = LatentDistributionTest(test,
                                             self.tests[test],
                                             n_components=2,
                                             n_bootstraps=100)
            p_null = ldt_null.fit_predict(A1, A2)
            p_alt = ldt_alt.fit_predict(A1, A3)
            self.assertTrue(p_null > 0.05)
            self.assertTrue(p_alt <= 0.05)
示例#10
0
def M(request):
    # module scope ensures that A and labels will always match
    # since they exist in separate functions

    # parameters
    n = 10
    p, q = 0.9, 0.3

    # block probability matrix
    P = np.full((2, 2), q)
    P[np.diag_indices_from(P)] = p

    # generate sbm
    return sbm([n] * 2, P, directed=False, return_labels=True)
示例#11
0
def test_no_nans(assortative):
    # this generated a matrix with nan values before

    Y = gen_covariates_beta()
    N = 1500  # Total number of nodes
    n = N // 3
    p, q = 0.15, 0.3
    B = np.array([[p, p, q], [p, p, q], [q, q, p]])
    A = sbm([n, n, n], B)

    # embed and plot
    case = CASE(assortative=assortative, n_components=2)
    latents = case.fit_transform(A, Y)
    assert np.isfinite(latents).all()
示例#12
0
 def test_SBM_fit_supervised(self):
     np.random.seed(8888)
     B = np.array([
         [0.9, 0.2, 0.05, 0.1],
         [0.1, 0.7, 0.1, 0.1],
         [0.2, 0.4, 0.8, 0.5],
         [0.1, 0.2, 0.1, 0.7],
     ])
     n = np.array([500, 500, 250, 250])
     g = sbm(n, B, directed=True, loops=False)
     sbe = SBMEstimator(directed=True, loops=False)
     labels = _n_to_labels(n)
     sbe.fit(g, y=labels)
     B_hat = sbe.block_p_
     assert_allclose(B_hat, B, atol=0.01)
示例#13
0
def dcsbm_corr(n,
               p,
               r,
               theta,
               epsilon1=1e-3,
               epsilon2=1e-3,
               directed=False,
               loops=False):
    '''
    Sample a pair of DC-SBM with the same marginal probabilities
    '''
    Z = np.repeat(np.arange(0, np.array(n).size), n)
    R = r * np.ones((np.sum(n), np.sum(n)))
    # sample a DC-SBM w/ block prob p
    G = sbm(n, p, dc=theta)
    # fit DC-SBM to G1 to estimate P
    G_dcsbm = DCSBMEstimator(directed=False).fit(G, y=Z)
    p_mat = G_dcsbm.p_mat_
    # P could be out of range
    p_mat[p_mat < epsilon1] = epsilon1
    p_mat[p_mat > 1 - epsilon2] = 1 - epsilon2
    # sample correlated graphs based on P
    G1, G2 = sample_edges_corr(p_mat, R, directed, loops)
    return G1, G2
示例#14
0

from graspologic.simulations import sbm
from graspologic.utils import remap_labels
from graspologic.plot import pairplot
from graspologic.embed import CovariateAssistedEmbedding
import seaborn as sns

n = 500
assortative = True
p, q = 0.03, 0.015
if not assortative:
    p, q = q, p
A, labels = sbm(
    [n, n, n],
    p=[[p, q, q], [q, p, q], [q, q, p]],
    return_labels=True,
)
#%%
# X = gen_covariates(labels, m1=0.8, m2=0.2, agreement=0.0)
X = gen_covariates(labels, m1=0.8, m2=0.2, agreement=1)
case = CovariateAssistedEmbedding(n_components=3, embedding_alg="assortative")
case.fit(A, covariates=X)

#%%
Xhat = case.latent_left_
pairplot(Xhat, labels=labels)


# # def M():
# #     # module scope ensures that A and labels will always match
示例#15
0
            nca = nearest_common_ancestor(source_node, target_node).name
            base_prob = probs[nca]
            new_prob = np.random.uniform(base_prob - alpha * base_prob,
                                         base_prob + alpha * base_prob)
            i = source_node.name
            j = target_node.name
            sbm_probs.loc[i, j] = new_prob

from graspologic.utils import symmetrize

sbm_probs = sbm_probs.values
sbm_probs = symmetrize(sbm_probs)
fig, ax = plt.subplots(1, 1, figsize=(6, 6))
adjplot(sbm_probs, ax=ax)

# %%
flat_labels = []
node_data = mt.node_data
for node, row in node_data.iterrows():
    path = row.values[:4]
    path = path[~np.isnan(path)]
    label = path[-1]
    flat_labels.append(label)

flat_labels = np.array(flat_labels)

#%%
A, flat_labels = sbm(n_per_leaf, sbm_probs, directed=False, return_labels=True)

fig, ax = plt.subplots(1, 1, figsize=(6, 6))
adjplot(A, ax=ax)
示例#16
0
#%%
import numpy as np
from graspologic.simulations import sbm

p1 = 0.7
p2 = 0.5
p3 = 0.1
p4 = 0.3
B1 = np.array([[p1, p3], [p3, p1]])  # affinity
B2 = np.array([[p1, p3], [p3, p2]])  # core-periphery
B3 = np.array([[p1, p2], [p2, p1]])  #
B4 = np.array([[p1, p4], [p4, p3]])

n = [50, 50]
A1, labels = sbm(n, B1, return_labels=True)
A2 = sbm(n, B2)
A3 = sbm(n, B3)
A4 = sbm(n, B4)

from graspologic.embed import AdjacencySpectralEmbed
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ortho_group
from graspologic.embed import selectSVD

sns.set_context("talk")

Vs = []
for i in range(10):
    As = [A1, A2, A3, A4]
    Xs = []
def sample_func():
    return sbm(ns, B, directed=True, loops=False)
示例#18
0
from graspologic.simulations import sbm

# for simplicity, the simulation code generates samples wherein
# vertices from the same community are ordered in the vertex set by
# their community order. Note that it would be theoretically equivalent to
# denote the total number of vertices in each community, or provide
# a vector tau with the first 50 entries taking the value 1, and the
# second 50 enties taking the value 0, given this fact.
ns = [50, 50]
n = sum(ns)

n = sum(ns)  # total number of vertices is the sum of the 
B = [[.5, .2],
    [.2, .05]]

A = sbm(n=ns, p=B)
_=heatmap(A, title="SBM(T, B) Simulation")

In the above simulation, we can clearly see an apparent $4$-"block structure", which describes the fact that the probability of an edge existing depends upon which of the $4$ "blocks" the edge falls into. These blocks are the apparent "subgraphs", or square patterns, observed in the above graph. The block structure is clearly delineated by the first $50$ vertices being from a single community, and the second $50$ vertices being from a different community.

It is important to note that a graph may be $SBM_n(\vec \tau, \pmb B)$ regardless of whether a block structure is visually discernable. Indeed, the block structure may only be apparent given a particular ordering of the vertices, an otherwise, may not even be discernable at all. Consider, for instance, a similar adjacency matrix to the graph plotted above, with the exact same realization, up to a permutation (reordering) of the vertices. The below graph shows the exact same set of adjacencies as-above, but wherein $\pmb A$ has had its vertices resorted randomly. The graph has an identical block structuure (up to the reordering of the vertices) as the preceding graph illustrated.

import numpy as np

# generate a permutation of the n vertices
vtx_perm = np.random.choice(n, size=n, replace=False)

# same adjacency matrix (up to reorder of the vertices)

heatmap(A[[vtx_perm]] [:,vtx_perm])
示例#19
0
        mpl.rcParams[key] = val
    context = sns.plotting_context(context=context, font_scale=font_scale, rc=rc_dict)
    sns.set_context(context)


set_theme()

#%%
from graspologic.simulations import sbm

np.random.seed(8888)
p = np.array([[0.7, 0.1], [0.1, 0.7]])
n = [100, 100]
r = 0.9
A1, A2 = sbm_corr(n, p, r, directed=False, loops=False)
_, labels = sbm(n, p, return_labels=True)
from graspologic.plot import heatmap
import networkx as nx

g1 = nx.from_numpy_array(A1)
# heatmap(A1, cbar=False)
# graph embedding
plt.figure()
nodelist = list(sorted(g1.nodes()))
colors = sns.color_palette("deep")
palette = dict(zip([0, 1], colors))
node_colors = list(map(palette.get, labels))
nx.draw_spring(g1, nodelist=nodelist, node_colors=node_colors)

#%%
from giskard.plot import graphplot
# ### A feedforward SBM model
# Here we construct a 2-block SBM where the block probabilities are feedforward with an
# amount that depends on $\delta$.

#%%


def construct_feedforward_B(p=0.5, delta=0):
    B = np.array([[p, p + delta], [p - delta, p]])
    return B


delta = 0.1
B = construct_feedforward_B(0.5, delta)
ns = [15, 15]
A, labels = sbm(ns, B, directed=True, loops=False, return_labels=True)
fig, axs = plt.subplots(1, 3, figsize=(12, 4))

title = "Block probabilities\n"
title += r"$p = $" + f"{p}, " + r"$\delta = $" + f"{delta}"
annot = np.array([[r"$p$", r"$p + \delta$"], [r"$p + \delta$", r"$p$"]])
sns.heatmap(
    B,
    vmin=0,
    center=0,
    vmax=1,
    cmap="RdBu_r",
    annot=annot,
    cbar=False,
    square=True,
    fmt="",
示例#21
0
文件: utils.py 项目: EliHei2/scPotter
def gen_syn_data(
    n_classes=3,
    n_obs_train=200,
    n_obs_test=100,
    n_features=10,
    n_edges=3,
    n_char_features=10,
    signal=[0, 0],
    diff_coef=[0.1, 0.1],
    noise=[0.2, 0.2],
    n_communities=5,
    probs=[0.5, 0.1],
    n_iter=3,
    model='BA',
    syn_method="sign",
    random_seed=1996):
    """
        Generates synthetic training and test datasets based on an underlying random graph model.
        Each class is defined by a set of characteristic features. 
        Each feature starts with random values. For each observation, the characteristic features of its class are increased by "signal".
        Then, values on each node are altered based on the synthetic method used.

        Parameters:
        ----------
        n_classes: int
            Number of classes
        n_obs_train: int 
            Number of observations per class for the training dataset
        n_obs_test: int 
            Number of observations per class for the test dataset
        n_features: int
            Number of features, each corresponding to a node in the graph
        n_char_features: int
            Number of features that are specific to each class
        signal: [float, float]
            The level of initial signal for the characteristic features, for training and test dataset respectively. 
            Only used when `syn_method == 'diffusion'` or `syn_method == 'activation'`.
        diff_coef: [float, float]
            How much each value transmits its value over the edges, for training and test dataset respectively.
            Only used when `syn_method == 'diffusion'`.
        noise: [float, float]
            (Gaussian) Noise level added at the end of the information passing, for training and test dataset respectively. 
        n_communities: int
            Number of graph communities for the Stochastic Block Model. Used only when `model == 'SBM'`.
        probs: [float, float]
            Probability of intra and inter cluster edges for the Stochastic Block Model. Used only when `model == 'SBM'`.
        model: str
            The random graph generation model. Can be `'BA'` for Barabási–Albert, `'ER'` for Erdős–Rényi, or `'SBM'` for Stochastic Block Model.
        syn_method: str
            The message passing synthetic process. Can be:
                `'diffusion'` for diffusing information over edges based on the difference on the end nodes.
                `'activation'` for activating a characteristic node based on its neighbors.
                `'sign'` for changing the sign of a characteristic node based on the average sign of its neighbors.

        Returns
        -------
        X_train : a numpy ndarray with features generated for the training dataset.
        y_train : a numpy ndarray with labels generated for the training dataset.
        adj_train : the adjacency matrix of the graph generated for the training dataset.
        X_test : a numpy ndarray with features generated for the test dataset.
        y_test : a numpy ndarray with labels generated for the test dataset.
        adj_test : the adjacency matrix of the graph generated for the test dataset.
    """
    np.random.seed(random_seed)
    if model=='ER':
        # Generate a random graph with the Erdos-Renyi model.
        graph_train = graph_test = ig.Graph.Erdos_Renyi(n=n_features, m=n_edges*n_features, directed=False)
        adj_train = adj_test = np.array(graph_train.get_adjacency().data)
    elif model=="BA":
        # Generate a scale-free graph with the Barabasi-Albert model.
        graph_train  = graph_test = ig.Graph.Barabasi(n_features, n_edges, directed=False)
        adj_train = adj_test = np.array(graph_train.get_adjacency().data)
    elif model=='SBM':
        # Generate a random graph with the stochastic block matrix model.
        n = [n_features // n_communities] * n_communities
        p = np.full((n_communities, n_communities), probs[1])
        adj_train = sbm(n=n, p=p)
        adj_test  = sbm(n=n, p=p)
        graph_train = ig.Graph.Adjacency(adj_train.tolist())
        graph_test  = ig.Graph.Adjacency(adj_test.tolist())
    elif model=='linear':
        g = ig.Graph()
        edges = [(i, i+1) for i in range(n_features-1)]
        g.add_vertices(n_features)
        g.add_edges(edges)
        graph_train = graph_test = g
        adj_train = np.array(g.get_adjacency().data)
        adj_test  = np.array(g.get_adjacency().data)
    else:
        print("Unrecognized random graph generation model. Please use ER, BA, linear, or SBM.")
    X_train = []
    y_train = []
    X_test  = []
    y_test  = []
    char_feat = dict()
    if syn_method=="sign":
        for c in range(n_classes):
            # Draw the features which define this class
            char_features = np.random.choice(n_features,size=n_char_features,replace=False)
            char_feat[c] = char_features
            for i in range(n_obs_train):
                # Start from a random vector
                features = np.random.normal(0, 1, n_features)
                features_next = np.copy(features)
                for f in char_features:
                    s=0
                    for neighbor in graph_train.neighbors(f):
                        s+=features[neighbor]
                    #set the sign to the average sign of the neighbours
                    features_next[f] = np.sign(s)* np.abs(features[f])
                features = features_next
                # Add additional noise
                if noise[0] > 0:
                    features += np.random.normal(0, noise[0], n_features)
                X_train.append(features)
                y_train.append(c)

            for i in range(n_obs_test):
                # Start from a random vector
                features = np.random.normal(0, 1, n_features)
                features_next = np.copy(features)
                for f in char_features:
                    s=0
                    for neighbor in graph_train.neighbors(f):
                        s+=features[neighbor]
                    # Set the sign to the average sign of the neighbours
                    features_next[f] = np.sign(s)*  np.abs(features[f])
                features = features_next
                # Add additional noise
                if noise[1] > 0:
                    features += np.random.normal(0, noise[1], n_features)
                X_test.append(features)
                y_test.append(c)
    elif syn_method=="diffusion":
        for c in range(n_classes):
            signal[0] = np.random.normal(signal[0], 1, 1)
            signal[1] = np.random.normal(signal[1], 1, 1)
            # Draw the features which define this class
            char_features = np.random.choice(n_features,size=n_char_features,replace=False)
            char_feat[c] = char_features
            for i in range(n_obs_train):
                # Start from a random vector
                features = np.abs(np.random.normal(0, 1, n_features))
                # Increase the value for the characteristic features
                features[char_features] += np.abs(np.random.normal(signal[0], 1, n_char_features))
                features = features / np.linalg.norm(features)
                # Diffuse values through the graph
                for it in range(n_iter):
                    features_next = np.copy(features)
                    for e in graph_train.es:
                        features_next[e.target]+= (features[e.source] - features[e.target]) * diff_coef[0]
                        features_next[e.source]+= (features[e.target] - features[e.source]) * diff_coef[0]
                    features = features_next
                if noise[0] > 0:
                    features += np.random.normal(0, noise[0], n_features)
                X_train.append(features)
                y_train.append(c)

            for i in range(n_obs_test):
                # Start from a random vector
                features = np.abs(np.random.normal(0, 1, n_features))
                # Increase the value for the characteristic features
                features[char_features] += np.abs(np.random.normal(signal[1], 1, n_char_features))
                features = features / np.linalg.norm(features)
                # Diffuse values through the graph
                for it in range(n_iter):
                    features_next = np.copy(features)
                    for e in graph_test.es:
                        features_next[e.target]+= (features[e.source] - features[e.target]) * diff_coef[1]
                        features_next[e.source]+= (features[e.target] - features[e.source]) * diff_coef[1]
                    features = features_next
                if noise[1] > 0:
                    features += np.random.normal(0, noise[1], n_features)
                X_test.append(features)
                y_test.append(c)
    
    elif syn_method=="activation":
        for c in range(n_classes):
            # Draw the features which define this class
            char_features = np.random.choice(n_features,size=n_char_features,replace=False)
            char_feat[c] = char_features
            for i in range(n_obs_train):
                # Start from a random vector
                features = np.random.normal(0, 1, n_features)
                features_next = np.copy(features)
                for f in char_features:
                    s=0
                    degree=0
                    for neighbor in graph_train.neighbors(f):
                        s+=features[neighbor]
                        degree+=1
                    degree = max(degree,1)
                    features_next[f] = np.random.normal(s/degree * signal[0],0.2) 

                features = features_next
                if noise[0] > 0:
                    features += np.random.normal(0, noise[0], n_features)
                X_train.append(features)
                y_train.append(c)

            for i in range(n_obs_test):
                # Start from a random vector
                features = np.random.normal(0, 1, n_features)            
                features_next = np.copy(features)
                for f in char_features:
                    s=0
                    degree=0
                    for neighbor in graph_train.neighbors(f):
                        s+=features[neighbor]
                        degree+=1
                    degree = max(degree,1)
                    features_next[f] = np.random.normal(s/degree * signal[1],0.2) 

                features = features_next
                if noise[1] > 0:
                    features += np.random.normal(0, noise[1], n_features)
                X_test.append(features)
                y_test.append(c)
    else:
        print("Unrecognized synthetic dataset generation method!")
    train_idx = np.random.permutation(len(y_train)) - 1
    X_train   = np.array(X_train)[train_idx, :]
    y_train   = np.array(y_train)[train_idx]
    test_idx  = np.random.permutation(len(y_test)) - 1
    X_test    = np.array(X_test)[test_idx, :]
    y_test    = np.array(y_test)[test_idx]

    return np.absolute(X_train), y_train, adj_train, \
        np.absolute(X_test), y_test, adj_test, char_feat
示例#22
0
B = np.array([
    [0.6, 0.3, 0.3, 0.1, 0.1, 0.1],
    [0.3, 0.6, 0.3, 0.1, 0.1, 0.1],
    [0.3, 0.3, 0.6, 0.1, 0.1, 0.1],
    [0.1, 0.1, 0.1, 0.6, 0.3, 0.3],
    [0.1, 0.1, 0.1, 0.3, 0.6, 0.3],
    [0.1, 0.1, 0.1, 0.3, 0.3, 0.6],
])

n_per_comm = 100
ns = 6 * [n_per_comm]

n_trials = 20
probability_matrix = np.zeros((600, 600))
for _ in range(n_trials):
    adjacency, labels = sbm(ns, B, return_labels=True)
    lt = LeidenTree(trials=5, verbose=False, max_levels=2)
    lt.fit(adjacency)
    lt.estimate_parameters(adjacency)
    probability_matrix += lt.full_probability_matrix.values / n_trials

np.unique(probability_matrix)

#%%

node_data = lt.node_data
node_data.sort_values(["labels_0", "labels_1"], inplace=True)
node_data["sorted_adjacency_index"] = range(len(node_data))
sorted_adjacency = adjacency[np.ix_(node_data["adjacency_index"],
                                    node_data["adjacency_index"])]
fig, ax = plt.subplots(1, 1, figsize=(16, 16))
示例#23
0
        return pred_labels


def compute_mcr(true_labels, pred_labels):
    confusion = confusion_matrix(labels, pred_labels)
    row_inds, col_inds = linear_sum_assignment(confusion, maximize=True)
    mcr = 1 - (np.trace(confusion[row_inds][:, col_inds]) / np.sum(confusion))
    return mcr


n_replicates = 30
gammas = [24, 36, 48, 60, 64]
rows = []
for replicate in range(n_replicates):
    # sample data
    adj, labels = sbm(comm_sizes, B, directed=False, return_labels=True)

    # GMMoASE
    ase_pred_labels, ase_embedding = spectral_clustering(adj,
                                                         method="ase",
                                                         return_embedding=True)
    ase_pred_labels = remap_labels(labels, ase_pred_labels)
    mcr = compute_mcr(labels, ase_pred_labels)
    for gamma in gammas:
        rows.append({
            "mcr": mcr,
            "method": "GMMoASE",
            "gamma": gamma,
            "replicate": replicate
        })
示例#24
0
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


def eig(A):
    evals, evecs = np.linalg.eig(A)
    sort_inds = np.argsort(evals)
    evals = evals[sort_inds]
    evecs = evecs[:, sort_inds]
    return evals, evecs


#%%
B = np.array([[0.8, 0.05], [0.05, 0.8]])
A = sbm([10, 10], B)
heatmap(A)

#%%
sns.set_context("talk")
degrees = np.sum(A, axis=0)
D = np.diag(degrees)
L = D - A
evals, evecs = eig(L)
fig = plt.figure()
sns.scatterplot(y=evals, x=np.arange(len(evals)))

#%%
rows = []
for p in np.linspace(0, 0.8, 20):
    for i in range(10):
示例#25
0
        edgecolor="darkred",
        linewidth=1,
        linestyle=":",
        facecolor="none",
    )
    ax.add_patch(circle)

#%%

from graspologic.simulations import sample_edges, sbm
from graspologic.utils import cartprod
import seaborn as sns

n_per_comm = 50
B = np.array([[0.8, 0.1, 0.1], [0.1, 0.75, 0.05], [0.1, 0.05, 0.6]])
_, labels = sbm([n_per_comm, n_per_comm, n_per_comm], B, return_labels=True)
P = B[np.ix_(labels, labels)]
sns.heatmap(P)

#%%
fig, ax = plt.subplots(1, 1, figsize=(8, 4))
true_eigvals = np.linalg.eigvalsh(P)
n_sims = 1000
all_estimated_eigvals = []
for i in range(n_sims):
    A = sample_edges(P, directed=False, loops=True)
    estimated_eigvals = np.linalg.eigvalsh(A)
    all_estimated_eigvals += list(estimated_eigvals)

sns.histplot((all_estimated_eigvals), ax=ax, stat='density')
示例#26
0
        dpi=dpi,
        bbox_inches=bbox_inches,
        transparent=transparent,
        pad_inches=pad_inches,
        facecolor=facecolor,
    )


set_theme()

n = [15, 15]
P1 = [[0.3, 0.1], [0.1, 0.7]]
P2 = [[0.3, 0.1], [0.1, 0.3]]

np.random.seed(8)
G1 = sbm(n, P1)
G2 = sbm(n, P2)

embedder = OmnibusEmbed(n_components=2)
Zhat = embedder.fit_transform([G1, G2])

print(Zhat.shape)

Xhat1 = Zhat[0]
Xhat2 = Zhat[1]
Xhat_full = np.concatenate((Xhat1, Xhat2), axis=0)

colors = sns.color_palette("deep")

# Plot the points
fig, ax = plt.subplots(figsize=(8, 8))
示例#27
0
import numpy as np
np.random.seed(42)

from graspologic.simulations import sbm
from graspologic.plot import heatmap

# Start with some simple parameters
N = 1500  # Total number of nodes
n = N // 3  # Nodes per community
p, q = .3, .15
B = np.array([[.3, .3, .15],
              [.3, .3, .15],
              [.15, .15, .3]])  # Our block probability matrix

# Make and visualize our Stochastic Block Model
A, labels = sbm([n, n, n], B, return_labels = True)
heatmap(A, title="A Stochastic Block Model");

There are three communities (we promise), but the first two are impossible to distinguish between using only our network. The third community is distinct: nodes belonging to it aren't likely to connect to nodes in the first two communities, and are very likely to connect to each other. If we wanted to embed this graph using our Laplacian or Adjacency Spectral Embedding methods, we'd find the first and second communities layered on top of each other.

from graspologic.embed import LaplacianSpectralEmbed as LSE
from graspologic.utils import to_laplacian
import matplotlib.pyplot as plt
import seaborn as sns


def plot_latents(latent_positions, *, title, labels, ax=None):
    if ax is None:
        ax = plt.gca()
    plot = sns.scatterplot(latent_positions[:, 0], latent_positions[:, 1], hue=labels, 
                           palette="Set1", linewidth=0, s=10, ax=ax)