Exemplo n.º 1
0
def test_mnn_graph_matrix_gamma():
    X, sample_idx = generate_swiss_roll()
    bs = 0.8
    gamma = np.array([
        [1, bs],  # 0
        [bs, 1]
    ])  # 3
    k = 10
    a = 20
    metric = 'euclidean'
    beta = 0
    samples = np.unique(sample_idx)

    K = np.zeros((len(X), len(X)))
    K[:] = np.nan
    K = pd.DataFrame(K)

    for si in samples:
        X_i = X[sample_idx == si]  # get observations in sample i
        for sj in samples:
            X_j = X[sample_idx == sj]  # get observation in sample j
            pdx_ij = cdist(X_i, X_j, metric=metric)  # pairwise distances
            kdx_ij = np.sort(pdx_ij, axis=1)  # get kNN
            e_ij = kdx_ij[:, k]  # dist to kNN
            pdxe_ij = pdx_ij / e_ij[:, np.newaxis]  # normalize
            k_ij = np.exp(-1 * (pdxe_ij**a))  # apply alpha-decaying kernel
            if si == sj:
                K.iloc[sample_idx == si, sample_idx == sj] = k_ij * \
                    (1 - beta)  # fill out values in K for NN on diagonal
            else:
                # fill out values in K for NN on diagonal
                K.iloc[sample_idx == si, sample_idx == sj] = k_ij

    K = np.array(K)

    matrix_gamma = pd.DataFrame(np.zeros((len(sample_idx), len(sample_idx))))
    for ix, si in enumerate(set(sample_idx)):
        for jx, sj in enumerate(set(sample_idx)):
            matrix_gamma.iloc[sample_idx == si, sample_idx == sj] = gamma[ix,
                                                                          jx]

    W = np.array((matrix_gamma * np.minimum(K, K.T)) +
                 ((1 - matrix_gamma) * np.maximum(K, K.T)))
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = graphtools.Graph(X,
                          knn=k + 1,
                          decay=a,
                          beta=1 - beta,
                          kernel_symm='gamma',
                          gamma=gamma,
                          distance=metric,
                          sample_idx=sample_idx,
                          thresh=0,
                          use_pygsp=True)
    assert G.N == G2.N
    assert np.all(G.d == G2.d)
    assert (G.W != G2.W).nnz == 0
    assert (G2.W != G.W).sum() == 0
    assert isinstance(G2, graphtools.graphs.MNNGraph)
Exemplo n.º 2
0
def test_knn_interpolate():
    G = build_graph(data, decay=None)
    assert_raises(ValueError, G.interpolate, data)
    pca_data = PCA(2).fit_transform(data)
    transitions = G.extend_to_data(data)
    assert(np.all(G.interpolate(pca_data, Y=data) ==
                  G.interpolate(pca_data, transitions=transitions)))
Exemplo n.º 3
0
def test_transform_sparse_adaptive_pca():
    G = build_graph(data, sparse=True, n_pca=True, random_state=42)
    assert np.all(G.data_nu == G.transform(G.data))
    with assert_raises_message(
        ValueError,
        "data of shape ({0}, 1) cannot be transformed to graph built on data of shape ({0}, {1}). Expected shape ({0}, {1})".format(
            G.data.shape[0], G.data.shape[1]
        ),
    ):
        G.transform(sp.csr_matrix(G.data)[:, 0])
    with assert_raises_message(
        ValueError,
        "data of shape ({0}, 15) cannot be transformed to graph built on data of shape ({0}, {1}). Expected shape ({0}, {1})".format(
            G.data.shape[0], G.data.shape[1]
        ),
    ):
        G.transform(sp.csr_matrix(G.data)[:, :15])

    G2 = build_graph(
        data, sparse=True, n_pca=True, rank_threshold=G.rank_threshold, random_state=42
    )
    assert np.allclose(G2.data_nu, G2.transform(G2.data))
    assert np.allclose(G2.data_nu, G.transform(G.data))

    G3 = build_graph(data, sparse=True, n_pca=G2.n_pca, random_state=42)
    assert np.allclose(G3.data_nu, G3.transform(G3.data))
    assert np.allclose(G3.data_nu, G2.transform(G2.data))
Exemplo n.º 4
0
def test_knn_graph():
    k = 3
    n_pca = 20
    pca = PCA(n_pca, svd_solver='randomized', random_state=42).fit(data)
    data_nu = pca.transform(data)
    pdx = squareform(pdist(data_nu, metric='euclidean'))
    knn_dist = np.partition(pdx, k, axis=1)[:, :k]
    epsilon = np.max(knn_dist, axis=1)
    K = np.empty_like(pdx)
    for i in range(len(pdx)):
        K[i, pdx[i, :] <= epsilon[i]] = 1
        K[i, pdx[i, :] > epsilon[i]] = 0

    K = K + K.T
    W = np.divide(K, 2)
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = build_graph(data, n_pca=n_pca,
                     decay=None, knn=k, random_state=42,
                     use_pygsp=True)
    assert(G.N == G2.N)
    assert(np.all(G.d == G2.d))
    assert((G.W != G2.W).nnz == 0)
    assert((G2.W != G.W).sum() == 0)
    assert(isinstance(G2, graphtools.graphs.kNNGraph))
Exemplo n.º 5
0
def test_inverse_transform_dense_no_pca():
    G = build_graph(data, n_pca=None)
    np.testing.assert_allclose(
        data[:, 5:7], G.inverse_transform(G.data_nu, columns=[5, 6]), atol=1e-12
    )
    assert np.all(G.data == G.inverse_transform(G.data_nu))
    with assert_raises_message(
        ValueError,
        "data of shape ({0},) cannot be inverse transformed from graph built on reduced data of shape ({0}, {1})".format(
            data.shape[0], G.data.shape[1]
        ),
    ):
        G.inverse_transform(G.data[:, 0])
    with assert_raises_message(
        ValueError,
        "data of shape ({0}, 1, 15) cannot be inverse transformed from graph built on reduced data of shape ({0}, {1})".format(
            data.shape[0], data.shape[1]
        ),
    ):
        G.inverse_transform(G.data[:, None, :15])
    with assert_raises_message(
        ValueError,
        "data of shape ({0}, 15) cannot be inverse transformed from graph built on reduced data of shape ({0}, {1})".format(
            data.shape[0], data.shape[1]
        ),
    ):
        G.inverse_transform(G.data[:, :15])
Exemplo n.º 6
0
def test_knnmax():
    data = datasets.make_swiss_roll()[0]
    k = 5
    k_max = 10
    a = 0.45
    thresh = 0

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", "K should be symmetric",
                                RuntimeWarning)
        G = build_graph(
            data,
            n_pca=None,  # n_pca,
            decay=a,
            knn=k - 1,
            knn_max=k_max - 1,
            thresh=0,
            random_state=42,
            kernel_symm=None,
        )
        assert np.all((G.K > 0).sum(axis=1) == k_max)

    pdx = squareform(pdist(data, metric="euclidean"))
    knn_dist = np.partition(pdx, k, axis=1)[:, :k]
    knn_max_dist = np.max(np.partition(pdx, k_max, axis=1)[:, :k_max], axis=1)
    epsilon = np.max(knn_dist, axis=1)
    pdx_scale = (pdx.T / epsilon).T
    K = np.where(pdx <= knn_max_dist[:, None], np.exp(-1 * pdx_scale**a), 0)
    K = K + K.T
    W = np.divide(K, 2)
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = build_graph(
        data,
        n_pca=None,  # n_pca,
        decay=a,
        knn=k - 1,
        knn_max=k_max - 1,
        thresh=0,
        random_state=42,
        use_pygsp=True,
    )
    assert isinstance(G2, graphtools.graphs.kNNGraph)
    assert G.N == G2.N
    assert np.all(G.dw == G2.dw)
    assert (G.W - G2.W).nnz == 0
Exemplo n.º 7
0
def test_inverse_transform_dense_no_pca():
    G = build_graph(data, n_pca=None)
    np.testing.assert_allclose(data[:, 5:7],
                               G.inverse_transform(G.data_nu, columns=[5, 6]),
                               atol=1e-12)
    assert np.all(G.data == G.inverse_transform(G.data_nu))
    assert_raises(ValueError, G.inverse_transform, G.data[:, 0])
    assert_raises(ValueError, G.inverse_transform, G.data[:, None, :15])
    assert_raises(ValueError, G.inverse_transform, G.data[:, :15])
Exemplo n.º 8
0
def test_truncated_exact_graph_no_pca():
    k = 3
    a = 13
    n_pca = None
    thresh = 1e-4
    data_small = data[np.random.choice(len(data),
                                       len(data) // 10,
                                       replace=False)]
    pdx = squareform(pdist(data_small, metric='euclidean'))
    knn_dist = np.partition(pdx, k, axis=1)[:, :k]
    epsilon = np.max(knn_dist, axis=1)
    weighted_pdx = (pdx.T / epsilon).T
    K = np.exp(-1 * weighted_pdx**a)
    K[K < thresh] = 0
    W = K + K.T
    W = np.divide(W, 2)
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = build_graph(data_small,
                     thresh=thresh,
                     graphtype='exact',
                     n_pca=n_pca,
                     decay=a,
                     knn=k,
                     random_state=42,
                     use_pygsp=True)
    assert (G.N == G2.N)
    assert (np.all(G.d == G2.d))
    assert ((G.W != G2.W).nnz == 0)
    assert ((G2.W != G.W).sum() == 0)
    assert (isinstance(G2, graphtools.graphs.TraditionalGraph))
    G2 = build_graph(sp.csr_matrix(data_small),
                     thresh=thresh,
                     graphtype='exact',
                     n_pca=n_pca,
                     decay=a,
                     knn=k,
                     random_state=42,
                     use_pygsp=True)
    assert (G.N == G2.N)
    assert (np.all(G.d == G2.d))
    assert ((G.W != G2.W).nnz == 0)
    assert ((G2.W != G.W).sum() == 0)
    assert (isinstance(G2, graphtools.graphs.TraditionalGraph))
Exemplo n.º 9
0
def test_exact_interpolate():
    G = build_graph(data, decay=10, thresh=0)
    with assert_raises_message(
            ValueError, "Either `transitions` or `Y` must be provided."):
        G.interpolate(data)
    pca_data = PCA(2).fit_transform(data)
    transitions = G.extend_to_data(data)
    assert np.all(
        G.interpolate(pca_data, Y=data) == G.interpolate(
            pca_data, transitions=transitions))
Exemplo n.º 10
0
def test_transform_sparse_pca():
    G = build_graph(data, sparse=True, n_pca=20)
    assert np.all(G.data_nu == G.transform(G.data))
    with assert_raises_message(
        ValueError,
        "data of shape ({0}, 1) cannot be transformed to graph built on data of shape ({0}, {1}). Expected shape ({0}, {1})".format(
            G.data.shape[0], G.data.shape[1]
        ),
    ):
        G.transform(sp.csr_matrix(G.data)[:, 0])
    with assert_raises_message(
        ValueError,
        "data of shape ({0}, 15) cannot be transformed to graph built on data of shape ({0}, {1}). Expected shape ({0}, {1})".format(
            G.data.shape[0], G.data.shape[1]
        ),
    ):
        G.transform(sp.csr_matrix(G.data)[:, :15])
Exemplo n.º 11
0
def test_transform_sparse_adaptive_pca():
    G = build_graph(data, sparse=True, n_pca=True, random_state=42)
    assert np.all(G.data_nu == G.transform(G.data))
    assert_raises(ValueError, G.transform, sp.csr_matrix(G.data)[:, 0])
    assert_raises(ValueError, G.transform, sp.csr_matrix(G.data)[:, :15])

    G2 = build_graph(data,
                     sparse=True,
                     n_pca=True,
                     rank_threshold=G.rank_threshold,
                     random_state=42)
    assert np.allclose(G2.data_nu, G2.transform(G2.data))
    assert np.allclose(G2.data_nu, G.transform(G.data))

    G3 = build_graph(data, sparse=True, n_pca=G2.n_pca, random_state=42)
    assert np.allclose(G3.data_nu, G3.transform(G3.data))
    assert np.allclose(G3.data_nu, G2.transform(G2.data))
Exemplo n.º 12
0
def test_mnn_with_non_zero_indexed_sample_idx():
    X, sample_idx = generate_swiss_roll()
    G = build_graph(X,
                    sample_idx=sample_idx,
                    kernel_symm='gamma',
                    gamma=0.5,
                    n_pca=None,
                    use_pygsp=True)
    sample_idx += 1
    G2 = build_graph(X,
                     sample_idx=sample_idx,
                     kernel_symm='gamma',
                     gamma=0.5,
                     n_pca=None,
                     use_pygsp=True)
    assert G.N == G2.N
    assert np.all(G.d == G2.d)
    assert (G.W != G2.W).nnz == 0
    assert (G2.W != G.W).sum() == 0
    assert isinstance(G2, graphtools.graphs.MNNGraph)
Exemplo n.º 13
0
def test_transform_dense_no_pca():
    G = build_graph(data, n_pca=None)
    assert np.all(G.data_nu == G.transform(G.data))
    with assert_raises_message(
        ValueError,
        "data of shape ({0},) cannot be transformed to graph built on data of shape ({0}, {1})".format(
            data.shape[0], data.shape[1]
        ),
    ):
        G.transform(G.data[:, 0])
    with assert_raises_message(
        ValueError,
        "data of shape ({0}, 1, 15) cannot be transformed to graph built on data of shape ({0}, {1})".format(
            data.shape[0], data.shape[1]
        ),
    ):
        G.transform(G.data[:, None, :15])
    with assert_raises_message(
        ValueError,
        "data of shape ({0}, 15) cannot be transformed to graph built on data of shape ({0}, {1})".format(
            data.shape[0], data.shape[1]
        ),
    ):
        G.transform(G.data[:, :15])
Exemplo n.º 14
0
def test_mnn_with_string_sample_idx():
    X, sample_idx = generate_swiss_roll()
    G = build_graph(
        X,
        sample_idx=sample_idx,
        kernel_symm="mnn",
        theta=0.5,
        n_pca=None,
        use_pygsp=True,
    )
    sample_idx = np.where(sample_idx == 0, "a", "b")
    G2 = build_graph(
        X,
        sample_idx=sample_idx,
        kernel_symm="mnn",
        theta=0.5,
        n_pca=None,
        use_pygsp=True,
    )
    assert G.N == G2.N
    assert np.all(G.d == G2.d)
    assert (G.W != G2.W).nnz == 0
    assert (G2.W != G.W).sum() == 0
    assert isinstance(G2, graphtools.graphs.MNNGraph)
Exemplo n.º 15
0
def test_truncated_exact_graph_sparse():
    k = 3
    a = 13
    n_pca = 20
    thresh = 1e-4
    data_small = data[np.random.choice(len(data),
                                       len(data) // 2,
                                       replace=False)]
    pca = TruncatedSVD(n_pca, random_state=42).fit(data_small)
    data_small_nu = pca.transform(data_small)
    pdx = squareform(pdist(data_small_nu, metric='euclidean'))
    knn_dist = np.partition(pdx, k, axis=1)[:, :k]
    epsilon = np.max(knn_dist, axis=1)
    weighted_pdx = (pdx.T / epsilon).T
    K = np.exp(-1 * weighted_pdx**a)
    K[K < thresh] = 0
    W = K + K.T
    W = np.divide(W, 2)
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = build_graph(sp.coo_matrix(data_small),
                     thresh=thresh,
                     graphtype='exact',
                     n_pca=n_pca,
                     decay=a,
                     knn=k,
                     random_state=42,
                     use_pygsp=True)
    assert (G.N == G2.N)
    np.testing.assert_allclose(G2.W.toarray(), G.W.toarray())
    assert (isinstance(G2, graphtools.graphs.TraditionalGraph))
    G2 = build_graph(sp.bsr_matrix(pdx),
                     n_pca=None,
                     precomputed='distance',
                     thresh=thresh,
                     decay=a,
                     knn=k,
                     random_state=42,
                     use_pygsp=True)
    assert (G.N == G2.N)
    assert (np.all(G.d == G2.d))
    assert ((G.W != G2.W).nnz == 0)
    assert ((G2.W != G.W).sum() == 0)
    assert (isinstance(G2, graphtools.graphs.TraditionalGraph))
    G2 = build_graph(sp.lil_matrix(K),
                     n_pca=None,
                     precomputed='affinity',
                     thresh=thresh,
                     random_state=42,
                     use_pygsp=True)
    assert (G.N == G2.N)
    assert (np.all(G.d == G2.d))
    assert ((G.W != G2.W).nnz == 0)
    assert ((G2.W != G.W).sum() == 0)
    assert (isinstance(G2, graphtools.graphs.TraditionalGraph))
    G2 = build_graph(sp.dok_matrix(W),
                     n_pca=None,
                     precomputed='adjacency',
                     random_state=42,
                     use_pygsp=True)
    assert (G.N == G2.N)
    assert (np.all(G.d == G2.d))
    assert ((G.W != G2.W).nnz == 0)
    assert ((G2.W != G.W).sum() == 0)
    assert (isinstance(G2, graphtools.graphs.TraditionalGraph))
Exemplo n.º 16
0
def test_transform_dense_no_pca():
    G = build_graph(data, n_pca=None)
    assert np.all(G.data_nu == G.transform(G.data))
    assert_raises(ValueError, G.transform, G.data[:, 0])
    assert_raises(ValueError, G.transform, G.data[:, None, :15])
    assert_raises(ValueError, G.transform, G.data[:, :15])
Exemplo n.º 17
0
def test_exact_graph():
    k = 3
    a = 13
    n_pca = 20
    data_small = data[np.random.choice(len(data),
                                       len(data) // 2,
                                       replace=False)]
    pca = PCA(n_pca, svd_solver='randomized', random_state=42).fit(data_small)
    data_small_nu = pca.transform(data_small)
    pdx = squareform(pdist(data_small_nu, metric='euclidean'))
    knn_dist = np.partition(pdx, k, axis=1)[:, :k]
    epsilon = np.max(knn_dist, axis=1)
    weighted_pdx = (pdx.T / epsilon).T
    K = np.exp(-1 * weighted_pdx**a)
    W = K + K.T
    W = np.divide(W, 2)
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = build_graph(data_small,
                     thresh=0,
                     n_pca=n_pca,
                     decay=a,
                     knn=k,
                     random_state=42,
                     use_pygsp=True)
    assert (G.N == G2.N)
    assert (np.all(G.d == G2.d))
    assert ((G.W != G2.W).nnz == 0)
    assert ((G2.W != G.W).sum() == 0)
    assert (isinstance(G2, graphtools.graphs.TraditionalGraph))
    G2 = build_graph(pdx,
                     n_pca=None,
                     precomputed='distance',
                     decay=a,
                     knn=k,
                     random_state=42,
                     use_pygsp=True)
    assert (G.N == G2.N)
    assert (np.all(G.d == G2.d))
    assert ((G.W != G2.W).nnz == 0)
    assert ((G2.W != G.W).sum() == 0)
    assert (isinstance(G2, graphtools.graphs.TraditionalGraph))
    G2 = build_graph(sp.coo_matrix(K),
                     n_pca=None,
                     precomputed='affinity',
                     random_state=42,
                     use_pygsp=True)
    assert (G.N == G2.N)
    assert (np.all(G.d == G2.d))
    assert ((G.W != G2.W).nnz == 0)
    assert ((G2.W != G.W).sum() == 0)
    assert (isinstance(G2, graphtools.graphs.TraditionalGraph))
    G2 = build_graph(K,
                     n_pca=None,
                     precomputed='affinity',
                     random_state=42,
                     use_pygsp=True)
    assert (G.N == G2.N)
    assert (np.all(G.d == G2.d))
    assert ((G.W != G2.W).nnz == 0)
    assert ((G2.W != G.W).sum() == 0)
    assert (isinstance(G2, graphtools.graphs.TraditionalGraph))
    G2 = build_graph(W,
                     n_pca=None,
                     precomputed='adjacency',
                     random_state=42,
                     use_pygsp=True)
    assert (G.N == G2.N)
    assert (np.all(G.d == G2.d))
    assert ((G.W != G2.W).nnz == 0)
    assert ((G2.W != G.W).sum() == 0)
    assert (isinstance(G2, graphtools.graphs.TraditionalGraph))
Exemplo n.º 18
0
def test_transform_sparse_pca():
    G = build_graph(data, sparse=True, n_pca=20)
    assert np.all(G.data_nu == G.transform(G.data))
    assert_raises(ValueError, G.transform, sp.csr_matrix(G.data)[:, 0])
    assert_raises(ValueError, G.transform, sp.csr_matrix(G.data)[:, :15])