示例#1
0
def test_sparse_alpha_knn_graph():
    data = datasets.make_swiss_roll()[0]
    k = 5
    a = 0.45
    thresh = 0.01
    bandwidth_scale = 1.3
    pdx = squareform(pdist(data, metric="euclidean"))
    knn_dist = np.partition(pdx, k, axis=1)[:, :k]
    epsilon = np.max(knn_dist, axis=1) * bandwidth_scale
    pdx = (pdx.T / epsilon).T
    K = np.exp(-1 * pdx**a)
    K = K + K.T
    W = np.divide(K, 2)
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = build_graph(
        data,
        n_pca=None,  # n_pca,
        decay=a,
        knn=k - 1,
        thresh=thresh,
        bandwidth_scale=bandwidth_scale,
        random_state=42,
        use_pygsp=True,
    )
    assert np.abs(G.W - G2.W).max() < thresh
    assert G.N == G2.N
    assert isinstance(G2, graphtools.graphs.kNNGraph)
示例#2
0
def test_knn_graph():
    k = 3
    n_pca = 20
    pca = PCA(n_pca, svd_solver='randomized', random_state=42).fit(data)
    data_nu = pca.transform(data)
    pdx = squareform(pdist(data_nu, metric='euclidean'))
    knn_dist = np.partition(pdx, k, axis=1)[:, :k]
    epsilon = np.max(knn_dist, axis=1)
    K = np.empty_like(pdx)
    for i in range(len(pdx)):
        K[i, pdx[i, :] <= epsilon[i]] = 1
        K[i, pdx[i, :] > epsilon[i]] = 0

    K = K + K.T
    W = np.divide(K, 2)
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = build_graph(data, n_pca=n_pca,
                     decay=None, knn=k, random_state=42,
                     use_pygsp=True)
    assert(G.N == G2.N)
    assert(np.all(G.d == G2.d))
    assert((G.W != G2.W).nnz == 0)
    assert((G2.W != G.W).sum() == 0)
    assert(isinstance(G2, graphtools.graphs.kNNGraph))
示例#3
0
def test_mnn_graph_matrix_gamma():
    X, sample_idx = generate_swiss_roll()
    bs = 0.8
    gamma = np.array([
        [1, bs],  # 0
        [bs, 1]
    ])  # 3
    k = 10
    a = 20
    metric = 'euclidean'
    beta = 0
    samples = np.unique(sample_idx)

    K = np.zeros((len(X), len(X)))
    K[:] = np.nan
    K = pd.DataFrame(K)

    for si in samples:
        X_i = X[sample_idx == si]  # get observations in sample i
        for sj in samples:
            X_j = X[sample_idx == sj]  # get observation in sample j
            pdx_ij = cdist(X_i, X_j, metric=metric)  # pairwise distances
            kdx_ij = np.sort(pdx_ij, axis=1)  # get kNN
            e_ij = kdx_ij[:, k]  # dist to kNN
            pdxe_ij = pdx_ij / e_ij[:, np.newaxis]  # normalize
            k_ij = np.exp(-1 * (pdxe_ij**a))  # apply alpha-decaying kernel
            if si == sj:
                K.iloc[sample_idx == si, sample_idx == sj] = k_ij * \
                    (1 - beta)  # fill out values in K for NN on diagonal
            else:
                # fill out values in K for NN on diagonal
                K.iloc[sample_idx == si, sample_idx == sj] = k_ij

    K = np.array(K)

    matrix_gamma = pd.DataFrame(np.zeros((len(sample_idx), len(sample_idx))))
    for ix, si in enumerate(set(sample_idx)):
        for jx, sj in enumerate(set(sample_idx)):
            matrix_gamma.iloc[sample_idx == si, sample_idx == sj] = gamma[ix,
                                                                          jx]

    W = np.array((matrix_gamma * np.minimum(K, K.T)) +
                 ((1 - matrix_gamma) * np.maximum(K, K.T)))
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = graphtools.Graph(X,
                          knn=k + 1,
                          decay=a,
                          beta=1 - beta,
                          kernel_symm='gamma',
                          gamma=gamma,
                          distance=metric,
                          sample_idx=sample_idx,
                          thresh=0,
                          use_pygsp=True)
    assert G.N == G2.N
    assert np.all(G.d == G2.d)
    assert (G.W != G2.W).nnz == 0
    assert (G2.W != G.W).sum() == 0
    assert isinstance(G2, graphtools.graphs.MNNGraph)
示例#4
0
def test_knn_graph_sparse():
    k = 3
    n_pca = 20
    pca = TruncatedSVD(n_pca, random_state=42).fit(data)
    data_nu = pca.transform(data)
    pdx = squareform(pdist(data_nu, metric="euclidean"))
    knn_dist = np.partition(pdx, k, axis=1)[:, :k]
    epsilon = np.max(knn_dist, axis=1)
    K = np.empty_like(pdx)
    for i in range(len(pdx)):
        K[i, pdx[i, :] <= epsilon[i]] = 1
        K[i, pdx[i, :] > epsilon[i]] = 0

    K = K + K.T
    W = np.divide(K, 2)
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = build_graph(
        sp.coo_matrix(data),
        n_pca=n_pca,
        decay=None,
        knn=k - 1,
        random_state=42,
        use_pygsp=True,
    )
    assert G.N == G2.N
    np.testing.assert_allclose(G2.W.toarray(), G.W.toarray())
    assert isinstance(G2, graphtools.graphs.kNNGraph)
示例#5
0
def test_knn_graph_anisotropy():
    k = 3
    a = 13
    n_pca = 20
    anisotropy = 0.9
    thresh = 1e-4
    data_small = data[np.random.choice(len(data), len(data) // 2, replace=False)]
    pca = PCA(n_pca, svd_solver="randomized", random_state=42).fit(data_small)
    data_small_nu = pca.transform(data_small)
    pdx = squareform(pdist(data_small_nu, metric="euclidean"))
    knn_dist = np.partition(pdx, k, axis=1)[:, :k]
    epsilon = np.max(knn_dist, axis=1)
    weighted_pdx = (pdx.T / epsilon).T
    K = np.exp(-1 * weighted_pdx ** a)
    K[K < thresh] = 0
    K = K + K.T
    K = np.divide(K, 2)
    d = K.sum(1)
    W = K / (np.outer(d, d) ** anisotropy)
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = build_graph(
        data_small,
        n_pca=n_pca,
        thresh=thresh,
        decay=a,
        knn=k - 1,
        random_state=42,
        use_pygsp=True,
        anisotropy=anisotropy,
    )
    assert isinstance(G2, graphtools.graphs.kNNGraph)
    assert G.N == G2.N
    np.testing.assert_allclose(G.dw, G2.dw, atol=1e-14, rtol=1e-14)
    np.testing.assert_allclose((G2.W - G.W).data, 0, atol=1e-14, rtol=1e-14)
示例#6
0
def test_knn_graph_multiplication_symm():
    k = 3
    n_pca = 20
    pca = PCA(n_pca, svd_solver="randomized", random_state=42).fit(data)
    data_nu = pca.transform(data)
    pdx = squareform(pdist(data_nu, metric="euclidean"))
    knn_dist = np.partition(pdx, k, axis=1)[:, :k]
    epsilon = np.max(knn_dist, axis=1)
    K = np.empty_like(pdx)
    for i in range(len(pdx)):
        K[i, pdx[i, :] <= epsilon[i]] = 1
        K[i, pdx[i, :] > epsilon[i]] = 0

    W = K * K.T
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = build_graph(
        data,
        n_pca=n_pca,
        decay=None,
        knn=k - 1,
        random_state=42,
        use_pygsp=True,
        kernel_symm="*",
    )
    assert G.N == G2.N
    np.testing.assert_equal(G.dw, G2.dw)
    assert (G.W - G2.W).nnz == 0
    assert (G2.W - G.W).sum() == 0
    assert isinstance(G2, graphtools.graphs.kNNGraph)
示例#7
0
def test_knn_graph_fixed_bandwidth():
    k = None
    decay = 5
    bandwidth = 10
    bandwidth_scale = 1.3
    n_pca = 20
    thresh = 1e-4
    pca = PCA(n_pca, svd_solver="randomized", random_state=42).fit(data)
    data_nu = pca.transform(data)
    pdx = squareform(pdist(data_nu, metric="euclidean"))
    K = np.exp(-1 * np.power(pdx / (bandwidth * bandwidth_scale), decay))
    K[K < thresh] = 0
    K = K + K.T
    W = np.divide(K, 2)
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = build_graph(
        data,
        n_pca=n_pca,
        decay=decay,
        bandwidth=bandwidth,
        bandwidth_scale=bandwidth_scale,
        knn=k,
        random_state=42,
        thresh=thresh,
        search_multiplier=2,
        use_pygsp=True,
    )
    assert isinstance(G2, graphtools.graphs.kNNGraph)
    np.testing.assert_array_equal(G.N, G2.N)
    np.testing.assert_array_equal(G.d, G2.d)
    np.testing.assert_allclose((G.W - G2.W).data,
                               np.zeros_like((G.W - G2.W).data),
                               atol=1e-14)
    bandwidth = np.random.gamma(20, 0.5, len(data))
    K = np.exp(-1 * (pdx.T / (bandwidth * bandwidth_scale)).T**decay)
    K[K < thresh] = 0
    K = K + K.T
    W = np.divide(K, 2)
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = build_graph(
        data,
        n_pca=n_pca,
        decay=decay,
        bandwidth=bandwidth,
        bandwidth_scale=bandwidth_scale,
        knn=k,
        random_state=42,
        thresh=thresh,
        use_pygsp=True,
    )
    assert isinstance(G2, graphtools.graphs.kNNGraph)
    np.testing.assert_array_equal(G.N, G2.N)
    np.testing.assert_allclose(G.dw, G2.dw, atol=1e-14)
    np.testing.assert_allclose((G.W - G2.W).data,
                               np.zeros_like((G.W - G2.W).data),
                               atol=1e-14)
示例#8
0
def test_shortest_path_constant():
    data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)]
    G = build_graph(data_small, knn=5, decay=None)
    P = graph_shortest_path(G.K)
    # sklearn returns 0 if no path exists
    P[np.where(P == 0)] = np.inf
    # diagonal should actually be zero
    np.fill_diagonal(P, 0)
    np.testing.assert_equal(P, G.shortest_path(distance="constant"))
示例#9
0
def test_exact_graph_callable_bandwidth():
    decay = 2
    knn = 5

    def bandwidth(x):
        return 2

    n_pca = 20
    thresh = 1e-4
    pca = PCA(n_pca, svd_solver="randomized", random_state=42).fit(data)
    data_nu = pca.transform(data)
    pdx = squareform(pdist(data_nu, metric="euclidean"))
    K = np.exp(-1 * (pdx / bandwidth(pdx))**decay)
    K[K < thresh] = 0
    K = K + K.T
    W = np.divide(K, 2)
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = build_graph(
        data,
        n_pca=n_pca,
        knn=knn - 1,
        decay=decay,
        bandwidth=bandwidth,
        random_state=42,
        thresh=thresh,
        use_pygsp=True,
    )
    assert isinstance(G2, graphtools.graphs.TraditionalGraph)
    assert G.N == G2.N
    np.testing.assert_equal(G.dw, G2.dw)
    assert (G2.W != G.W).sum() == 0
    assert (G.W != G2.W).nnz == 0

    def bandwidth(x):
        return np.percentile(x, 10, axis=1)

    K = np.exp(-1 * (pdx / bandwidth(pdx))**decay)
    K[K < thresh] = 0
    K = K + K.T
    W = np.divide(K, 2)
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = build_graph(
        data,
        n_pca=n_pca,
        knn=knn - 1,
        decay=decay,
        bandwidth=bandwidth,
        random_state=42,
        thresh=thresh,
        use_pygsp=True,
    )
    assert isinstance(G2, graphtools.graphs.TraditionalGraph)
    assert G.N == G2.N
    np.testing.assert_allclose(G.dw, G2.dw)
    np.testing.assert_allclose((G2.W - G.W).data, 0, atol=1e-14)
示例#10
0
def test_shortest_path_data():
    data_small = data[np.random.choice(len(data), len(data) // 4, replace=False)]
    G = build_graph(data_small, knn=5, decay=None)
    D = squareform(pdist(G.data_nu)) * np.where(G.K.toarray() > 0, 1, 0)
    P = graph_shortest_path(D)
    # sklearn returns 0 if no path exists
    P[np.where(P == 0)] = np.inf
    # diagonal should actually be zero
    np.fill_diagonal(P, 0)
    np.testing.assert_allclose(P, G.shortest_path(distance="data"))
    np.testing.assert_allclose(P, G.shortest_path())
示例#11
0
def test_shortest_path_affinity():
    data_small = data[np.random.choice(len(data),
                                       len(data) // 4,
                                       replace=False)]
    G = build_graph(data_small, knn=5, decay=15)
    D = -1 * np.where(G.K != 0, np.log(np.where(G.K != 0, G.K, np.nan)), 0)
    P = graph_shortest_path(D)
    # sklearn returns 0 if no path exists
    P[np.where(P == 0)] = np.inf
    # diagonal should actually be zero
    np.fill_diagonal(P, 0)
    np.testing.assert_allclose(P, G.shortest_path(distance="affinity"))
    np.testing.assert_allclose(P, G.shortest_path())
示例#12
0
def test_exact_graph_fixed_bandwidth():
    decay = 2
    knn = None
    bandwidth = 2
    n_pca = 20
    pca = PCA(n_pca, svd_solver="randomized", random_state=42).fit(data)
    data_nu = pca.transform(data)
    pdx = squareform(pdist(data_nu, metric="euclidean"))
    K = np.exp(-1 * (pdx / bandwidth)**decay)
    K = K + K.T
    W = np.divide(K, 2)
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = build_graph(
        data,
        n_pca=n_pca,
        graphtype="exact",
        knn=knn,
        decay=decay,
        bandwidth=bandwidth,
        random_state=42,
        thresh=0,
        use_pygsp=True,
    )
    assert isinstance(G2, graphtools.graphs.TraditionalGraph)
    assert G.N == G2.N
    np.testing.assert_allclose(G.dw, G2.dw)
    np.testing.assert_allclose((G2.W - G.W).data, 0, atol=1e-14)
    bandwidth = np.random.gamma(5, 0.5, len(data))
    K = np.exp(-1 * (pdx.T / bandwidth).T**decay)
    K = K + K.T
    W = np.divide(K, 2)
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = build_graph(
        data,
        n_pca=n_pca,
        graphtype="exact",
        knn=knn,
        decay=decay,
        bandwidth=bandwidth,
        random_state=42,
        thresh=0,
        use_pygsp=True,
    )
    assert isinstance(G2, graphtools.graphs.TraditionalGraph)
    assert G.N == G2.N
    np.testing.assert_allclose(G.dw, G2.dw)
    np.testing.assert_allclose((G2.W - G.W).data, 0, atol=1e-14)
示例#13
0
def test_truncated_exact_graph_no_pca():
    k = 3
    a = 13
    n_pca = None
    thresh = 1e-4
    data_small = data[np.random.choice(len(data),
                                       len(data) // 10,
                                       replace=False)]
    pdx = squareform(pdist(data_small, metric="euclidean"))
    knn_dist = np.partition(pdx, k, axis=1)[:, :k]
    epsilon = np.max(knn_dist, axis=1)
    weighted_pdx = (pdx.T / epsilon).T
    K = np.exp(-1 * weighted_pdx**a)
    K[K < thresh] = 0
    W = K + K.T
    W = np.divide(W, 2)
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = build_graph(
        data_small,
        thresh=thresh,
        graphtype="exact",
        n_pca=n_pca,
        decay=a,
        knn=k - 1,
        random_state=42,
        use_pygsp=True,
    )
    assert G.N == G2.N
    np.testing.assert_equal(G.dw, G2.dw)
    assert (G.W != G2.W).nnz == 0
    assert (G2.W != G.W).sum() == 0
    assert isinstance(G2, graphtools.graphs.TraditionalGraph)
    G2 = build_graph(
        sp.csr_matrix(data_small),
        thresh=thresh,
        graphtype="exact",
        n_pca=n_pca,
        decay=a,
        knn=k - 1,
        random_state=42,
        use_pygsp=True,
    )
    assert G.N == G2.N
    np.testing.assert_equal(G.dw, G2.dw)
    assert (G.W != G2.W).nnz == 0
    assert (G2.W != G.W).sum() == 0
    assert isinstance(G2, graphtools.graphs.TraditionalGraph)
示例#14
0
def test_knnmax():
    data = datasets.make_swiss_roll()[0]
    k = 5
    k_max = 10
    a = 0.45
    thresh = 0

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", "K should be symmetric",
                                RuntimeWarning)
        G = build_graph(
            data,
            n_pca=None,  # n_pca,
            decay=a,
            knn=k - 1,
            knn_max=k_max - 1,
            thresh=0,
            random_state=42,
            kernel_symm=None,
        )
        assert np.all((G.K > 0).sum(axis=1) == k_max)

    pdx = squareform(pdist(data, metric="euclidean"))
    knn_dist = np.partition(pdx, k, axis=1)[:, :k]
    knn_max_dist = np.max(np.partition(pdx, k_max, axis=1)[:, :k_max], axis=1)
    epsilon = np.max(knn_dist, axis=1)
    pdx_scale = (pdx.T / epsilon).T
    K = np.where(pdx <= knn_max_dist[:, None], np.exp(-1 * pdx_scale**a), 0)
    K = K + K.T
    W = np.divide(K, 2)
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = build_graph(
        data,
        n_pca=None,  # n_pca,
        decay=a,
        knn=k - 1,
        knn_max=k_max - 1,
        thresh=0,
        random_state=42,
        use_pygsp=True,
    )
    assert isinstance(G2, graphtools.graphs.kNNGraph)
    assert G.N == G2.N
    assert np.all(G.dw == G2.dw)
    assert (G.W - G2.W).nnz == 0
示例#15
0
def test_knn_graph():
    k = 3
    n_pca = 20
    pca = PCA(n_pca, svd_solver="randomized", random_state=42).fit(data)
    data_nu = pca.transform(data)
    pdx = squareform(pdist(data_nu, metric="euclidean"))
    knn_dist = np.partition(pdx, k, axis=1)[:, :k]
    epsilon = np.max(knn_dist, axis=1)
    K = np.empty_like(pdx)
    for i in range(len(pdx)):
        K[i, pdx[i, :] <= epsilon[i]] = 1
        K[i, pdx[i, :] > epsilon[i]] = 0

    K = K + K.T
    W = np.divide(K, 2)
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = build_graph(data,
                     n_pca=n_pca,
                     decay=None,
                     knn=k - 1,
                     random_state=42,
                     use_pygsp=True)
    assert G.N == G2.N
    np.testing.assert_equal(G.dw, G2.dw)
    assert (G.W - G2.W).nnz == 0
    assert (G2.W - G.W).sum() == 0
    assert isinstance(G2, graphtools.graphs.kNNGraph)

    K2 = G2.build_kernel_to_data(G2.data_nu, knn=k)
    K2 = (K2 + K2.T) / 2
    assert (G2.K - K2).nnz == 0
    assert (G2.build_kernel_to_data(
        G2.data_nu, knn=data.shape[0]).nnz == data.shape[0] * data.shape[0])
    with assert_warns_message(
            UserWarning,
            "Cannot set knn ({}) to be greater than "
            "n_samples ({}). Setting knn={}".format(data.shape[0] + 1,
                                                    data.shape[0],
                                                    data.shape[0]),
    ):
        G2.build_kernel_to_data(
            Y=G2.data_nu,
            knn=data.shape[0] + 1,
        )
示例#16
0
def test_exact_graph_anisotropy():
    k = 3
    a = 13
    n_pca = 20
    anisotropy = 0.9
    data_small = data[np.random.choice(len(data),
                                       len(data) // 2,
                                       replace=False)]
    pca = PCA(n_pca, svd_solver="randomized", random_state=42).fit(data_small)
    data_small_nu = pca.transform(data_small)
    pdx = squareform(pdist(data_small_nu, metric="euclidean"))
    knn_dist = np.partition(pdx, k, axis=1)[:, :k]
    epsilon = np.max(knn_dist, axis=1)
    weighted_pdx = (pdx.T / epsilon).T
    K = np.exp(-1 * weighted_pdx**a)
    K = K + K.T
    K = np.divide(K, 2)
    d = K.sum(1)
    W = K / (np.outer(d, d)**anisotropy)
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = build_graph(
        data_small,
        thresh=0,
        n_pca=n_pca,
        decay=a,
        knn=k - 1,
        random_state=42,
        use_pygsp=True,
        anisotropy=anisotropy,
    )
    assert isinstance(G2, graphtools.graphs.TraditionalGraph)
    assert G.N == G2.N
    np.testing.assert_equal(G.dw, G2.dw)
    assert (G2.W != G.W).sum() == 0
    assert (G.W != G2.W).nnz == 0
    with assert_raises_message(ValueError,
                               "Expected 0 <= anisotropy <= 1. Got -1"):
        build_graph(
            data_small,
            thresh=0,
            n_pca=n_pca,
            decay=a,
            knn=k - 1,
            random_state=42,
            use_pygsp=True,
            anisotropy=-1,
        )
    with assert_raises_message(ValueError,
                               "Expected 0 <= anisotropy <= 1. Got 2"):
        build_graph(
            data_small,
            thresh=0,
            n_pca=n_pca,
            decay=a,
            knn=k - 1,
            random_state=42,
            use_pygsp=True,
            anisotropy=2,
        )
    with assert_raises_message(ValueError,
                               "Expected 0 <= anisotropy <= 1. Got invalid"):
        build_graph(
            data_small,
            thresh=0,
            n_pca=n_pca,
            decay=a,
            knn=k - 1,
            random_state=42,
            use_pygsp=True,
            anisotropy="invalid",
        )
示例#17
0
def test_exact_graph():
    k = 3
    a = 13
    n_pca = 20
    bandwidth_scale = 1.3
    data_small = data[np.random.choice(len(data),
                                       len(data) // 2,
                                       replace=False)]
    pca = PCA(n_pca, svd_solver="randomized", random_state=42).fit(data_small)
    data_small_nu = pca.transform(data_small)
    pdx = squareform(pdist(data_small_nu, metric="euclidean"))
    knn_dist = np.partition(pdx, k, axis=1)[:, :k]
    epsilon = np.max(knn_dist, axis=1) * bandwidth_scale
    weighted_pdx = (pdx.T / epsilon).T
    K = np.exp(-1 * weighted_pdx**a)
    W = K + K.T
    W = np.divide(W, 2)
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = build_graph(
        data_small,
        thresh=0,
        n_pca=n_pca,
        decay=a,
        knn=k - 1,
        random_state=42,
        bandwidth_scale=bandwidth_scale,
        use_pygsp=True,
    )
    assert G.N == G2.N
    np.testing.assert_equal(G.dw, G2.dw)
    assert (G.W != G2.W).nnz == 0
    assert (G2.W != G.W).sum() == 0
    assert isinstance(G2, graphtools.graphs.TraditionalGraph)
    G2 = build_graph(
        pdx,
        n_pca=None,
        precomputed="distance",
        bandwidth_scale=bandwidth_scale,
        decay=a,
        knn=k - 1,
        random_state=42,
        use_pygsp=True,
    )
    assert G.N == G2.N
    np.testing.assert_equal(G.dw, G2.dw)
    assert (G.W != G2.W).nnz == 0
    assert (G2.W != G.W).sum() == 0
    assert isinstance(G2, graphtools.graphs.TraditionalGraph)
    G2 = build_graph(
        sp.coo_matrix(K),
        n_pca=None,
        precomputed="affinity",
        random_state=42,
        use_pygsp=True,
    )
    assert G.N == G2.N
    np.testing.assert_equal(G.dw, G2.dw)
    assert (G.W != G2.W).nnz == 0
    assert (G2.W != G.W).sum() == 0
    assert isinstance(G2, graphtools.graphs.TraditionalGraph)
    G2 = build_graph(K,
                     n_pca=None,
                     precomputed="affinity",
                     random_state=42,
                     use_pygsp=True)
    assert G.N == G2.N
    np.testing.assert_equal(G.dw, G2.dw)
    assert (G.W != G2.W).nnz == 0
    assert (G2.W != G.W).sum() == 0
    assert isinstance(G2, graphtools.graphs.TraditionalGraph)
    G2 = build_graph(W,
                     n_pca=None,
                     precomputed="adjacency",
                     random_state=42,
                     use_pygsp=True)
    assert G.N == G2.N
    np.testing.assert_equal(G.dw, G2.dw)
    assert (G.W != G2.W).nnz == 0
    assert (G2.W != G.W).sum() == 0
    assert isinstance(G2, graphtools.graphs.TraditionalGraph)
示例#18
0
def test_truncated_exact_graph_sparse():
    k = 3
    a = 13
    n_pca = 20
    thresh = 1e-4
    data_small = data[np.random.choice(len(data),
                                       len(data) // 2,
                                       replace=False)]
    pca = TruncatedSVD(n_pca, random_state=42).fit(data_small)
    data_small_nu = pca.transform(data_small)
    pdx = squareform(pdist(data_small_nu, metric="euclidean"))
    knn_dist = np.partition(pdx, k, axis=1)[:, :k]
    epsilon = np.max(knn_dist, axis=1)
    weighted_pdx = (pdx.T / epsilon).T
    K = np.exp(-1 * weighted_pdx**a)
    K[K < thresh] = 0
    W = K + K.T
    W = np.divide(W, 2)
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = build_graph(
        sp.coo_matrix(data_small),
        thresh=thresh,
        graphtype="exact",
        n_pca=n_pca,
        decay=a,
        knn=k - 1,
        random_state=42,
        use_pygsp=True,
    )
    assert G.N == G2.N
    np.testing.assert_allclose(G2.W.toarray(), G.W.toarray())
    assert isinstance(G2, graphtools.graphs.TraditionalGraph)
    G2 = build_graph(
        sp.bsr_matrix(pdx),
        n_pca=None,
        precomputed="distance",
        thresh=thresh,
        decay=a,
        knn=k - 1,
        random_state=42,
        use_pygsp=True,
    )
    assert G.N == G2.N
    np.testing.assert_equal(G.dw, G2.dw)
    assert (G.W != G2.W).nnz == 0
    assert (G2.W != G.W).sum() == 0
    assert isinstance(G2, graphtools.graphs.TraditionalGraph)
    G2 = build_graph(
        sp.lil_matrix(K),
        n_pca=None,
        precomputed="affinity",
        thresh=thresh,
        random_state=42,
        use_pygsp=True,
    )
    assert G.N == G2.N
    np.testing.assert_equal(G.dw, G2.dw)
    assert (G.W != G2.W).nnz == 0
    assert (G2.W != G.W).sum() == 0
    assert isinstance(G2, graphtools.graphs.TraditionalGraph)
    G2 = build_graph(
        sp.dok_matrix(W),
        n_pca=None,
        precomputed="adjacency",
        random_state=42,
        use_pygsp=True,
    )
    assert G.N == G2.N
    np.testing.assert_equal(G.dw, G2.dw)
    assert (G.W != G2.W).nnz == 0
    assert (G2.W != G.W).sum() == 0
    assert isinstance(G2, graphtools.graphs.TraditionalGraph)
示例#19
0
def test_mnn_graph_decay():
    X, sample_idx = generate_swiss_roll()
    theta = 0.9
    k = 10
    a = 20
    metric = "euclidean"
    beta = 0.2
    samples = np.unique(sample_idx)

    K = np.zeros((len(X), len(X)))
    K[:] = np.nan
    K = pd.DataFrame(K)

    for si in samples:
        X_i = X[sample_idx == si]  # get observations in sample i
        for sj in samples:
            batch_k = k if si == sj else k - 1
            X_j = X[sample_idx == sj]  # get observation in sample j
            pdx_ij = cdist(X_i, X_j, metric=metric)  # pairwise distances
            kdx_ij = np.sort(pdx_ij, axis=1)  # get kNN
            e_ij = kdx_ij[:, batch_k]  # dist to kNN
            pdxe_ij = pdx_ij / e_ij[:, np.newaxis]  # normalize
            k_ij = np.exp(-1 * (pdxe_ij ** a))  # apply alpha-decaying kernel
            if si == sj:
                K.iloc[sample_idx == si, sample_idx == sj] = (k_ij + k_ij.T) / 2
            else:
                # fill out values in K for NN on diagonal
                K.iloc[sample_idx == si, sample_idx == sj] = k_ij

    Kn = K.copy()
    for i in samples:
        curr_K = K.iloc[sample_idx == i, sample_idx == i]
        i_norm = norm(curr_K, 1, axis=1)
        for j in samples:
            if i == j:
                continue
            else:
                curr_K = K.iloc[sample_idx == i, sample_idx == j]
                curr_norm = norm(curr_K, 1, axis=1)
                scale = np.minimum(1, i_norm / curr_norm) * beta
                Kn.iloc[sample_idx == i, sample_idx == j] = (
                    curr_K.values * scale[:, None]
                )

    K = Kn
    W = np.array((theta * np.minimum(K, K.T)) + ((1 - theta) * np.maximum(K, K.T)))
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = graphtools.Graph(
        X,
        knn=k,
        decay=a,
        beta=beta,
        kernel_symm="mnn",
        theta=theta,
        distance=metric,
        sample_idx=sample_idx,
        thresh=0,
        use_pygsp=True,
    )
    assert G.N == G2.N
    np.testing.assert_array_equal(G.dw, G2.dw)
    np.testing.assert_array_equal((G.W - G2.W).data, 0)
    assert isinstance(G2, graphtools.graphs.MNNGraph)
示例#20
0
def test_exact_graph():
    k = 3
    a = 13
    n_pca = 20
    data_small = data[np.random.choice(len(data),
                                       len(data) // 2,
                                       replace=False)]
    pca = PCA(n_pca, svd_solver='randomized', random_state=42).fit(data_small)
    data_small_nu = pca.transform(data_small)
    pdx = squareform(pdist(data_small_nu, metric='euclidean'))
    knn_dist = np.partition(pdx, k, axis=1)[:, :k]
    epsilon = np.max(knn_dist, axis=1)
    weighted_pdx = (pdx.T / epsilon).T
    K = np.exp(-1 * weighted_pdx**a)
    W = K + K.T
    W = np.divide(W, 2)
    np.fill_diagonal(W, 0)
    G = pygsp.graphs.Graph(W)
    G2 = build_graph(data_small,
                     thresh=0,
                     n_pca=n_pca,
                     decay=a,
                     knn=k,
                     random_state=42,
                     use_pygsp=True)
    assert (G.N == G2.N)
    assert (np.all(G.d == G2.d))
    assert ((G.W != G2.W).nnz == 0)
    assert ((G2.W != G.W).sum() == 0)
    assert (isinstance(G2, graphtools.graphs.TraditionalGraph))
    G2 = build_graph(pdx,
                     n_pca=None,
                     precomputed='distance',
                     decay=a,
                     knn=k,
                     random_state=42,
                     use_pygsp=True)
    assert (G.N == G2.N)
    assert (np.all(G.d == G2.d))
    assert ((G.W != G2.W).nnz == 0)
    assert ((G2.W != G.W).sum() == 0)
    assert (isinstance(G2, graphtools.graphs.TraditionalGraph))
    G2 = build_graph(sp.coo_matrix(K),
                     n_pca=None,
                     precomputed='affinity',
                     random_state=42,
                     use_pygsp=True)
    assert (G.N == G2.N)
    assert (np.all(G.d == G2.d))
    assert ((G.W != G2.W).nnz == 0)
    assert ((G2.W != G.W).sum() == 0)
    assert (isinstance(G2, graphtools.graphs.TraditionalGraph))
    G2 = build_graph(K,
                     n_pca=None,
                     precomputed='affinity',
                     random_state=42,
                     use_pygsp=True)
    assert (G.N == G2.N)
    assert (np.all(G.d == G2.d))
    assert ((G.W != G2.W).nnz == 0)
    assert ((G2.W != G.W).sum() == 0)
    assert (isinstance(G2, graphtools.graphs.TraditionalGraph))
    G2 = build_graph(W,
                     n_pca=None,
                     precomputed='adjacency',
                     random_state=42,
                     use_pygsp=True)
    assert (G.N == G2.N)
    assert (np.all(G.d == G2.d))
    assert ((G.W != G2.W).nnz == 0)
    assert ((G2.W != G.W).sum() == 0)
    assert (isinstance(G2, graphtools.graphs.TraditionalGraph))