Exemplo n.º 1
0
def test_ase_three_blocks():
    """
    Expect 3 clusters from a 3 block model
    """
    np.random.seed(1)

    # Generate adjacency and labels
    n = 50
    n_communites = [n, n, n]
    p = np.array([[0.8, 0.3, 0.2], [0.3, 0.8, 0.3], [0.2, 0.3, 0.8]])
    y = np.repeat([1, 2, 3], repeats=n)

    A = sbm(n=n_communites, p=p)

    # Embed to get latent positions
    ase = AdjacencySpectralEmbed(n_components=5)
    X_hat = ase.fit_transform(A)

    # Compute clusters
    AutoGMM = AutoGMMCluster(max_components=10)
    AutoGMM.fit(X_hat, y)

    n_components = AutoGMM.n_components_

    # Assert that the three cluster model is the best
    assert_equal(n_components, 3)

    # Asser that we get perfect clustering
    assert_allclose(AutoGMM.ari_, 1)
Exemplo n.º 2
0
 def setUp(self):
     np.random.seed(9001)
     n = [10, 10]
     p = np.array([[0.9, 0.1], [0.1, 0.9]])
     wt = [[normal, poisson], [poisson, normal]]
     wtargs = [
         [dict(loc=3, scale=1), dict(lam=5)],
         [dict(lam=5), dict(loc=3, scale=1)],
     ]
     self.testgraphs = dict(
         Guw=sbm(n=n, p=p),
         Gw=sbm(n=n, p=p, wt=wt, wtargs=wtargs),
         Guwd=sbm(n=n, p=p, directed=True),
         Gwd=sbm(n=n, p=p, wt=wt, wtargs=wtargs, directed=True),
     )
     self.lse = LaplacianSpectralEmbed(n_components=2)
Exemplo n.º 3
0
 def test_unconnected_warning(self):
     n = [50, 50]
     p = [[1, 0], [0, 1]]
     A = sbm(n, p)
     with self.assertWarns(UserWarning):
         lse = LaplacianSpectralEmbed()
         lse.fit(A)
Exemplo n.º 4
0
    def test_ase_three_blocks(self):
        """
        Expect 3 clusters from a 3 block model
        """
        np.random.seed(3)
        num_sims = 10

        # Generate adjacency and labels
        n = 50
        n_communites = [n, n, n]
        p = np.array([[0.8, 0.3, 0.2], [0.3, 0.8, 0.3], [0.2, 0.3, 0.8]])
        y = np.repeat([1, 2, 3], repeats=n)

        for _ in range(num_sims):
            A = sbm(n=n_communites, p=p)

            # Embed to get latent positions
            ase = AdjacencySpectralEmbed(n_components=5)
            X_hat = ase.fit_transform(A)

            # Compute clusters
            gclust = GaussianCluster(min_components=10)
            gclust.fit(X_hat, y)

            n_components = gclust.n_components_

            # Assert that the three cluster model is the best
            assert_equal(n_components, 3)

            # Asser that we get perfect clustering
            assert_allclose(gclust.ari_.loc[n_components], 1)
Exemplo n.º 5
0
 def test_unconnected_warning(self):
     n = [50, 50]
     p = [[1, 0], [0, 1]]
     A = csr_matrix(sbm(n, p))
     with pytest.warns(UserWarning):
         lse = LaplacianSpectralEmbed()
         lse.fit(A)
Exemplo n.º 6
0
    def test_output_two_block_sbm(self):
        np.random.seed(10)
        n_communities = [100, 100]
        P = np.array([[0.5, 0.1], [0.1, 0.5]])
        A = sbm(n_communities, P)

        elbows, _ = select_dimension(A, n_elbows=2)
        assert_equal(elbows[0], 2)
Exemplo n.º 7
0
def make_train_directed(n=[128, 128], m=10):
    p1 = [[0, 0.9], [0, 0]]
    p2 = [[0, 0], [0.9, 0]]
    p3 = [[0.9, 0.9], [0, 0]]
    p4 = [[0, 0], [0.9, 0.9]]

    A = [sbm(n, p, directed=True) for _ in range(m) for p in [p1, p2, p3, p4]]

    return A
Exemplo n.º 8
0
def test_sort_inds():
    B = np.array(
        [
            [0, 0.2, 0.1, 0.1, 0.1],
            [0.2, 0.8, 0.1, 0.3, 0.1],
            [0.15, 0.1, 0, 0.05, 0.1],
            [0.1, 0.1, 0.2, 1, 0.1],
            [0.1, 0.2, 0.1, 0.1, 0.8],
        ]
    )

    g = sbm([10, 30, 50, 25, 25], B, directed=True)
    degrees = g.sum(axis=0) + g.sum(axis=1)
    degree_sort_inds = np.argsort(degrees)
    labels2 = 40 * ["0"] + 100 * ["1"]
    labels1 = 10 * ["d"] + 30 * ["c"] + 50 * ["d"] + 25 * ["e"] + 25 * ["c"]
    labels1 = np.array(labels1)
    labels2 = np.array(labels2)
    sorted_inds = _sort_inds(g, labels1, labels2, True)
    # sort outer blocks first if given, sort by num verts in the block
    # for inner hier, sort by num verts for that category across the entire graph
    # ie if there are multiple inner hier across different outer blocks, sort
    # by prevalence in the entire graph, not within block
    # this is to make the ordering within outer block consistent
    # within a block, sort by degree

    # outer block order should thus be: 1, 0
    # inner block order should thus be: d, c, e

    # show that outer blocks are sorted correctly
    labels2 = labels2[sorted_inds]
    assert np.all(labels2[:100] == "1")
    assert np.all(labels2[100:] == "0")

    # show that inner blocks are sorted correctly
    labels1 = labels1[sorted_inds]
    assert np.all(labels1[:50] == "d")
    assert np.all(labels1[50:75] == "c")
    assert np.all(labels1[75:100] == "e")
    assert np.all(labels1[100:110] == "d")
    assert np.all(labels1[110:] == "c")

    # show that within block, everything is in descending degree order
    degrees = degrees[sorted_inds]
    assert np.all(np.diff(degrees[:50]) <= 0)
    assert np.all(np.diff(degrees[50:75]) <= 0)
    assert np.all(np.diff(degrees[75:100]) <= 0)
    assert np.all(np.diff(degrees[100:110]) <= 0)
    assert np.all(np.diff(degrees[110:]) <= 0)
Exemplo n.º 9
0
    def _gen_mat_data(n: int = 20,
                      m: int = 20,
                      p: int = 0.50,
                      mat_type: str = 'sb',
                      binary: bool = False,
                      asfile: bool = True,
                      n_graphs: int = 1):
        if binary is True:
            wt = 1
        else:
            wt = np.random.uniform

        mat_list = []
        mat_file_list = []
        for nm in range(n_graphs):
            if mat_type == 'er':
                mat = largest_connected_component(
                    symmetrize(
                        remove_loops(
                            er_nm(n,
                                  m,
                                  wt=np.random.uniform,
                                  wtargs=dict(low=0, high=1)))))
            elif mat_type == 'sb':
                if p is None:
                    raise ValueError(
                        f"for mat_type {mat_type}, p cannot be None")
                mat = largest_connected_component(
                    symmetrize(
                        remove_loops(
                            sbm(np.array([n]),
                                np.array([[p]]),
                                wt=wt,
                                wtargs=dict(low=0, high=1)))))
            else:
                raise ValueError(f"mat_type {mat_type} not recognized!")

            mat_list.append(mat)

            if asfile is True:
                mat_path_tmp = tempfile.NamedTemporaryFile(mode='w+',
                                                           suffix='.npy',
                                                           delete=False)
                mat_path = str(mat_path_tmp.name)
                np.save(mat_path, mat)
                mat_file_list.append(mat_path)
                mat_path_tmp.close()

        return {'mat_list': mat_list, 'mat_file_list': mat_file_list}
Exemplo n.º 10
0
def _test_sbm_er_binary(self,
                        method,
                        P,
                        directed=False,
                        sparse=False,
                        *args,
                        **kwargs):
    np.random.seed(8888)

    num_sims = 50
    verts = 200
    communities = 2

    verts_per_community = [100, 100]

    sbm_wins = 0
    er_wins = 0
    for sim in range(0, num_sims):
        sbm_sample = sbm(verts_per_community, P, directed=directed)
        er = er_np(verts, 0.5, directed=directed)
        if sparse:
            sbm_sample = csr_matrix(sbm_sample)
            er = csr_matrix(er)
        embed_sbm = method(n_components=2, concat=directed)
        embed_er = method(n_components=2, concat=directed)

        labels_sbm = np.zeros((verts), dtype=np.int8)
        labels_er = np.zeros((verts), dtype=np.int8)
        labels_sbm[100:] = 1
        labels_er[100:] = 1

        X_sbm = embed_sbm.fit_transform(sbm_sample)
        X_er = embed_er.fit_transform(er)

        if directed:
            self.assertEqual(X_sbm.shape, (verts, 2 * communities))
            self.assertEqual(X_er.shape, (verts, 2 * communities))
        else:
            self.assertEqual(X_sbm.shape, (verts, communities))
            self.assertEqual(X_er.shape, (verts, communities))

        aris = _kmeans_comparison((X_sbm, X_er), (labels_sbm, labels_er),
                                  communities)
        sbm_wins = sbm_wins + (aris[0] > aris[1])
        er_wins = er_wins + (aris[0] < aris[1])

    self.assertTrue(sbm_wins > er_wins)
Exemplo n.º 11
0
def make_train_undirected(n=[128, 128], m=10, alpha=1):
    """
    Make 4 class training dataset
    n = number of vertices
    m = number of graphs from each class
    """
    c1 = np.array([[0.1, 0], [0, 0.1]])
    c2 = -1 * c1
    c3 = np.array([[0.1, 0], [0, 0]])
    c4 = np.array([[0, 0], [0, 0.1]])

    A = [
        sbm(n,
            np.ones((2, 2)) * 0.25 + alpha * c) for _ in range(m)
        for c in [c1, c2, c3, c4]
    ]

    return A
Exemplo n.º 12
0
    def test_embedding(self):
        epsilon = 0.1
        nodes_per_community = 100
        P = np.array([[0.8, 0.2], [0.2, 0.8]])
        undirected, labels_ = sbm(2 * [nodes_per_community],
                                  P,
                                  return_labels=True)

        oos_idx = 0
        A, a = remove_vertices(undirected,
                               indices=oos_idx,
                               return_removed=True)

        lse = LaplacianSpectralEmbed(n_components=2)
        X_hat = lse.fit_transform(A)
        w = lse.transform(a)
        self.assertTrue(
            X_hat[0][0] - epsilon < w[0][0] < X_hat[0][0] + epsilon)
        self.assertTrue(
            X_hat[0][1] - epsilon < w[0][1] < X_hat[0][1] + epsilon)
Exemplo n.º 13
0
    def test_directed_correct_latent_positions(self):
        # setup
        ase = AdjacencySpectralEmbed(n_components=3)
        P = np.array([[0.9, 0.1, 0.1], [0.3, 0.6, 0.1], [0.1, 0.5, 0.6]])
        M, labels = sbm([200, 200, 200], P, directed=True, return_labels=True)

        # one node from each community
        oos_idx = np.nonzero(np.r_[1, np.diff(labels)[:-1]])[0]
        labels = list(labels)
        oos_labels = [labels.pop(i) for i in oos_idx]

        # Grab out-of-sample, fit, transform
        A, a = remove_vertices(M, indices=oos_idx, return_removed=True)
        latent_left, latent_right = ase.fit_transform(A)
        oos_left, oos_right = ase.transform(a)

        # separate into communities
        for i, latent in enumerate([latent_left, latent_right]):
            left = i == 0
            df = pd.DataFrame(
                {
                    "Type": labels,
                    "Dimension 1": latent[:, 0],
                    "Dimension 2": latent[:, 1],
                    "Dimension 3": latent[:, 2],
                }
            )
            # make sure that oos vertices are closer to their true community averages than other community averages
            means = df.groupby("Type").mean()
            if left:
                avg_dist_within = np.diag(pairwise_distances(means, oos_left))
                avg_dist_between = np.diag(pairwise_distances(means, oos_right))
                self.assertTrue(all(avg_dist_within < avg_dist_between))
            elif not left:
                avg_dist_within = np.diag(pairwise_distances(means, oos_right))
                avg_dist_between = np.diag(pairwise_distances(means, oos_left))
                self.assertTrue(all(avg_dist_within < avg_dist_between))
Exemplo n.º 14
0
    def _gen_mat_data(n: int=20, m: int=20, p: int=0.50,
                      mat_type: str='sb', binary: bool=False,
                      asfile: bool=True, n_graphs: int=1,
                      lcc: bool=False, modality: str='func'):
        if binary is True:
            wt = 1
        else:
            wt = np.random.uniform

        mat_list = []
        mat_file_list = []

        if n_graphs > 0:
            for nm in range(n_graphs):
                if mat_type == 'er':
                    mat = symmetrize(
                        remove_loops(er_nm(n, m, wt=np.random.uniform,
                                           wtargs=dict(low=0, high=1))))
                elif mat_type == 'sb':
                    if p is None:
                        raise ValueError(
                            f"for mat_type {mat_type}, p cannot be None")
                    mat = symmetrize(
                        remove_loops(sbm(np.array([n]), np.array([[p]]),
                                         wt=wt, wtargs=dict(low=0,
                                                            high=1))))
                else:
                    raise ValueError(f"mat_type {mat_type} not recognized!")

                if lcc is True:
                    mat = largest_connected_component(mat)

                mat_list.append(autofix(mat))

                if asfile is True:
                    path_tmp = tempfile.NamedTemporaryFile(mode='w+',
                                                           suffix='.npy',
                                                           delete=False)
                    mat_path_tmp = str(path_tmp.name)
                    out_folder = f"{str(Path.home())}/test_mats"
                    os.makedirs(out_folder, exist_ok=True)

                    if modality == 'func':
                        mat_path = f"{out_folder}/graph_sub-999_modality-func_" \
                        f"model-corr_template-" \
                        f"MNI152_2mm_" \
                        f"parc_tol-6fwhm_hpass-" \
                        f"0Hz_" \
                        f"signal-mean_thrtype-prop_thr-" \
                        f"{round(random.uniform(0, 1),2)}.npy"
                    elif modality == 'dwi':
                        mat_path = f"{out_folder}/graph_sub-999_modality-func_" \
                        f"model-csa_template-" \
                        f"MNI152_2mm_tracktype-local_" \
                        f"traversal-det_minlength-30_" \
                        f"tol-5_thrtype-prop_thr-" \
                        f"{round(random.uniform(0, 1),2)}.npy"

                    shutil.copyfile(mat_path_tmp, mat_path)
                    np.save(mat_path, mat)
                    mat_file_list.append(mat_path)
                    path_tmp.close()

        return {'mat_list': mat_list, 'mat_file_list': mat_file_list}
Exemplo n.º 15
0
import pytest
import numpy as np
from graspologic.embed.ase import AdjacencySpectralEmbed
from graspologic.simulations.simulations import sbm
from graspologic.nominate import SpectralVertexNomination

# global constants for tests
n_verts = 50
p = np.array([[0.7, 0.25, 0.2], [0.25, 0.8, 0.3], [0.2, 0.3, 0.85]])
labels = np.array([0] * n_verts + [1] * n_verts + [2] * n_verts)
adj = np.array(sbm(3 * [n_verts], p), dtype=np.int)
embeder = AdjacencySpectralEmbed()
pre_embeded = embeder.fit_transform(adj)


def _nominate(X, seed, nominator=None, k=None):
    if nominator is None:
        nominator = SpectralVertexNomination(n_neighbors=k)
    nominator.fit(X)
    n_verts = X.shape[0]
    nom_list, dists = nominator.predict(seed)
    assert nom_list.shape == (n_verts, seed.shape[0])
    assert dists.shape == (n_verts, seed.shape[0])
    return nom_list


def _test_seed_input_dimensions():
    with pytest.raises(IndexError):
        _nominate(adj, np.zeros((5, 5, 5), dtype=np.int))