Пример #1
0
def test_unregularized_multi():
    # Tests unregularized CCA methods for more than 2 views. The idea is that all of these should give the same result.
    latent_dims = 2
    cca = rCCA(latent_dims=latent_dims).fit((X, Y, Z))
    iter = CCA_ALS(latent_dims=latent_dims,
                   stochastic=False,
                   tol=1e-12,
                   random_state=rng).fit((X, Y, Z))
    gcca = GCCA(latent_dims=latent_dims).fit((X, Y, Z))
    mcca = MCCA(latent_dims=latent_dims).fit((X, Y, Z))
    kcca = KCCA(latent_dims=latent_dims).fit((X, Y, Z))
    corr_cca = cca.score((X, Y, Z))
    corr_iter = iter.score((X, Y, Z))
    corr_gcca = gcca.score((X, Y, Z))
    corr_mcca = mcca.score((X, Y, Z))
    corr_kcca = kcca.score((X, Y, Z))
    # Check the correlations from each unregularized method are the same
    assert np.testing.assert_array_almost_equal(corr_cca, corr_iter,
                                                decimal=1) is None
    assert np.testing.assert_array_almost_equal(corr_cca, corr_mcca,
                                                decimal=1) is None
    assert np.testing.assert_array_almost_equal(corr_cca, corr_gcca,
                                                decimal=1) is None
    assert np.testing.assert_array_almost_equal(corr_cca, corr_kcca,
                                                decimal=1) is None
Пример #2
0
def test_sparse_input():
    # Tests unregularized CCA methods. The idea is that all of these should give the same result.
    latent_dims = 2
    cca = CCA(latent_dims=latent_dims, centre=False).fit((X_sp, Y_sp))
    iter = CCA_ALS(
        latent_dims=latent_dims,
        tol=1e-9,
        stochastic=False,
        centre=False,
        random_state=rng,
    ).fit((X_sp, Y_sp))
    iter_pls = PLS_ALS(latent_dims=latent_dims, tol=1e-9, centre=False).fit(
        (X_sp, Y_sp))
    gcca = GCCA(latent_dims=latent_dims, centre=False).fit((X_sp, Y_sp))
    mcca = MCCA(latent_dims=latent_dims, centre=False).fit((X_sp, Y_sp))
    kcca = KCCA(latent_dims=latent_dims, centre=False).fit((X_sp, Y_sp))
    scca = SCCA(latent_dims=latent_dims, centre=False, c=0.001).fit(
        (X_sp, Y_sp))
    corr_cca = cca.score((X, Y))
    corr_iter = iter.score((X, Y))
    corr_gcca = gcca.score((X, Y))
    corr_mcca = mcca.score((X, Y))
    corr_kcca = kcca.score((X, Y))
    # Check the correlations from each unregularized method are the same
    assert np.testing.assert_array_almost_equal(
        corr_iter, corr_mcca, decimal=1) is None
    assert np.testing.assert_array_almost_equal(
        corr_iter, corr_gcca, decimal=1) is None
    assert np.testing.assert_array_almost_equal(
        corr_iter, corr_kcca, decimal=1) is None
Пример #3
0
def test_unregularized_methods():
    # Tests unregularized CCA methods. The idea is that all of these should give the same result.
    latent_dims = 2
    cca = CCA(latent_dims=latent_dims).fit([X, Y])
    iter = CCA_ALS(latent_dims=latent_dims,
                   tol=1e-9,
                   stochastic=False,
                   random_state=rng).fit([X, Y])
    gcca = GCCA(latent_dims=latent_dims).fit([X, Y])
    mcca = MCCA(latent_dims=latent_dims, eps=1e-9).fit([X, Y])
    kcca = KCCA(latent_dims=latent_dims).fit([X, Y])
    kgcca = KGCCA(latent_dims=latent_dims).fit([X, Y])
    tcca = TCCA(latent_dims=latent_dims).fit([X, Y])
    corr_cca = cca.score((X, Y))
    corr_iter = iter.score((X, Y))
    corr_gcca = gcca.score((X, Y))
    corr_mcca = mcca.score((X, Y))
    corr_kcca = kcca.score((X, Y))
    corr_kgcca = kgcca.score((X, Y))
    corr_tcca = tcca.score((X, Y))
    assert np.testing.assert_array_almost_equal(corr_cca, corr_iter,
                                                decimal=1) is None
    assert np.testing.assert_array_almost_equal(corr_cca, corr_mcca,
                                                decimal=1) is None
    assert np.testing.assert_array_almost_equal(corr_cca, corr_gcca,
                                                decimal=1) is None
    assert np.testing.assert_array_almost_equal(corr_cca, corr_kcca,
                                                decimal=1) is None
    assert np.testing.assert_array_almost_equal(corr_cca, corr_tcca,
                                                decimal=1) is None
    assert (np.testing.assert_array_almost_equal(
        corr_kgcca, corr_gcca, decimal=1) is None)
    # Check standardized models have standard outputs
    assert (np.testing.assert_allclose(
        np.linalg.norm(iter.transform(
            (X, Y))[0], axis=0)**2, n, rtol=0.2) is None)
    assert (np.testing.assert_allclose(
        np.linalg.norm(cca.transform(
            (X, Y))[0], axis=0)**2, n, rtol=0.2) is None)
    assert (np.testing.assert_allclose(
        np.linalg.norm(mcca.transform(
            (X, Y))[0], axis=0)**2, n, rtol=0.2) is None)
    assert (np.testing.assert_allclose(
        np.linalg.norm(kcca.transform(
            (X, Y))[0], axis=0)**2, n, rtol=0.2) is None)
    assert (np.testing.assert_allclose(
        np.linalg.norm(iter.transform(
            (X, Y))[1], axis=0)**2, n, rtol=0.2) is None)
    assert (np.testing.assert_allclose(
        np.linalg.norm(cca.transform(
            (X, Y))[1], axis=0)**2, n, rtol=0.2) is None)
    assert (np.testing.assert_allclose(
        np.linalg.norm(mcca.transform(
            (X, Y))[1], axis=0)**2, n, rtol=0.2) is None)
    assert (np.testing.assert_allclose(
        np.linalg.norm(kcca.transform(
            (X, Y))[1], axis=0)**2, n, rtol=0.2) is None)
Пример #4
0
 def test_cv_fit(self):
     latent_dims = 5
     c1 = [0.1, 0.2]
     c2 = [0.1, 0.2]
     param_candidates = {'c': list(itertools.product(c1, c2))}
     wrap_unweighted_gcca = GCCA(latent_dims=latent_dims).gridsearch_fit(self.X, self.Y, folds=2,
                                                                         param_candidates=param_candidates,
                                                                         plot=True)
     wrap_deweighted_gcca = GCCA(latent_dims=latent_dims, view_weights=[0.5, 0.5]).gridsearch_fit(
         self.X, self.Y, folds=2, param_candidates=param_candidates)
     wrap_mcca = MCCA(latent_dims=latent_dims).gridsearch_fit(
         self.X, self.Y, folds=2, param_candidates=param_candidates)
Пример #5
0
 def test_weighted_GCCA_methods(self):
     # Test that linear regularized methods match PLS solution when using maximum regularisation
     latent_dims = 5
     c = 0
     wrap_unweighted_gcca = GCCA(latent_dims=latent_dims, c=[c, c]).fit(self.X, self.Y)
     wrap_deweighted_gcca = GCCA(latent_dims=latent_dims, c=[c, c], view_weights=[0.5, 0.5]).fit(
         self.X, self.Y)
     corr_unweighted_gcca = wrap_unweighted_gcca.train_correlations[0, 1]
     corr_deweighted_gcca = wrap_deweighted_gcca.train_correlations[0, 1]
     # Check the correlations from each unregularized method are the same
     K = np.ones((2, self.X.shape[0]))
     K[0, 200:] = 0
     wrap_unobserved_gcca = GCCA(latent_dims=latent_dims, c=[c, c]).fit(self.X, self.Y, K=K)
     self.assertIsNone(np.testing.assert_array_almost_equal(corr_unweighted_gcca, corr_deweighted_gcca, decimal=1))
Пример #6
0
def test_regularized_methods():
    # Test that linear regularized methods match PLS solution when using maximum regularisation.
    latent_dims = 2
    c = 1
    kernel = KCCA(latent_dims=latent_dims,
                  c=[c, c],
                  kernel=["linear", "linear"]).fit((X, Y))
    pls = PLS(latent_dims=latent_dims).fit([X, Y])
    gcca = GCCA(latent_dims=latent_dims, c=[c, c]).fit([X, Y])
    mcca = MCCA(latent_dims=latent_dims, c=[c, c]).fit([X, Y])
    rcca = rCCA(latent_dims=latent_dims, c=[c, c]).fit([X, Y])
    corr_gcca = gcca.score((X, Y))
    corr_mcca = mcca.score((X, Y))
    corr_kernel = kernel.score((X, Y))
    corr_pls = pls.score((X, Y))
    corr_rcca = rcca.score((X, Y))
    # Check the correlations from each unregularized method are the same
    assert np.testing.assert_array_almost_equal(corr_pls, corr_mcca,
                                                decimal=1) is None
    assert (np.testing.assert_array_almost_equal(
        corr_pls, corr_kernel, decimal=1) is None)
    assert np.testing.assert_array_almost_equal(corr_pls, corr_rcca,
                                                decimal=1) is None
Пример #7
0
def test_weighted_GCCA_methods():
    # TODO we have view weighted GCCA and missing observation GCCA
    latent_dims = 2
    c = 0
    unweighted_gcca = GCCA(latent_dims=latent_dims, c=[c, c]).fit([X, Y])
    deweighted_gcca = GCCA(latent_dims=latent_dims,
                           c=[c, c],
                           view_weights=[0.5, 0.5]).fit([X, Y])
    corr_unweighted_gcca = unweighted_gcca.score((X, Y))
    corr_deweighted_gcca = deweighted_gcca.score((X, Y))
    # Check the correlations from each unregularized method are the same
    K = np.ones((2, X.shape[0]))
    K[0, 200:] = 0
    unobserved_gcca = GCCA(latent_dims=latent_dims, c=[c, c]).fit((X, Y), K=K)
    assert (np.testing.assert_array_almost_equal(
        corr_unweighted_gcca, corr_deweighted_gcca, decimal=1) is None)
Пример #8
0
 def test_regularized_methods(self):
     # Test that linear regularized methods match PLS solution when using maximum regularisation
     latent_dims = 5
     c = 1
     wrap_kernel = KCCA(latent_dims=latent_dims, c=[c, c], kernel=['linear', 'linear']).fit(self.X,
                                                                                            self.Y)
     wrap_pls = PLS(latent_dims=latent_dims).fit(self.X, self.Y)
     wrap_gcca = GCCA(latent_dims=latent_dims, c=[c, c]).fit(self.X, self.Y)
     wrap_mcca = MCCA(latent_dims=latent_dims, c=[c, c]).fit(self.X, self.Y)
     wrap_rCCA = rCCA(latent_dims=latent_dims, c=[c, c]).fit(self.X, self.Y)
     corr_gcca = wrap_gcca.train_correlations[0, 1]
     corr_mcca = wrap_mcca.train_correlations[0, 1]
     corr_kernel = wrap_kernel.train_correlations[0, 1]
     corr_pls = wrap_pls.train_correlations[0, 1]
     corr_rcca = wrap_rCCA.train_correlations[0, 1]
     # Check the correlations from each unregularized method are the same
     # self.assertIsNone(np.testing.assert_array_almost_equal(corr_pls, corr_gcca, decimal=2))
     self.assertIsNone(np.testing.assert_array_almost_equal(corr_pls, corr_mcca, decimal=1))
     self.assertIsNone(np.testing.assert_array_almost_equal(corr_pls, corr_kernel, decimal=1))
     self.assertIsNone(np.testing.assert_array_almost_equal(corr_pls, corr_rcca, decimal=1))
Пример #9
0
 def test_unregularized_methods(self):
     latent_dims = 1
     wrap_cca = CCA(latent_dims=latent_dims).fit(self.X, self.Y)
     wrap_iter = CCA_ALS(latent_dims=latent_dims, tol=1e-9).fit(self.X, self.Y)
     wrap_gcca = GCCA(latent_dims=latent_dims).fit(self.X, self.Y)
     wrap_mcca = MCCA(latent_dims=latent_dims).fit(self.X, self.Y)
     wrap_kcca = KCCA(latent_dims=latent_dims).fit(self.X, self.Y)
     corr_cca = wrap_cca.train_correlations[0, 1]
     corr_iter = wrap_iter.train_correlations[0, 1]
     corr_gcca = wrap_gcca.train_correlations[0, 1]
     corr_mcca = wrap_mcca.train_correlations[0, 1]
     corr_kcca = wrap_kcca.train_correlations[0, 1]
     # Check the score outputs are the right shape
     self.assertTrue(wrap_iter.score_list[0].shape == (self.X.shape[0], latent_dims))
     self.assertTrue(wrap_gcca.score_list[0].shape == (self.X.shape[0], latent_dims))
     self.assertTrue(wrap_mcca.score_list[0].shape == (self.X.shape[0], latent_dims))
     self.assertTrue(wrap_kcca.score_list[0].shape == (self.X.shape[0], latent_dims))
     # Check the correlations from each unregularized method are the same
     self.assertIsNone(np.testing.assert_array_almost_equal(corr_cca, corr_iter, decimal=2))
     self.assertIsNone(np.testing.assert_array_almost_equal(corr_iter, corr_mcca, decimal=2))
     self.assertIsNone(np.testing.assert_array_almost_equal(corr_iter, corr_gcca, decimal=2))
     self.assertIsNone(np.testing.assert_array_almost_equal(corr_iter, corr_kcca, decimal=2))
Пример #10
0
latent_dims = 1
cv = 3

(X, Y, Z), (tx, ty, tz) = generate_covariance_data(
    n, view_features=[p, q, r], latent_dims=latent_dims, correlation=[0.9]
)

# %%
# Eigendecomposition-Based Methods
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

# %%
mcca = MCCA(latent_dims=latent_dims).fit((X, Y, X)).score((X, Y, Z))

# %%
gcca = GCCA(latent_dims=latent_dims).fit((X, Y, X)).score((X, Y, Z))

# %%
# We can also use kernel versions of these methods
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

# %%
kcca = KCCA(latent_dims=latent_dims).fit((X, Y, X)).score((X, Y, Z))

# %%
kgcca = KGCCA(latent_dims=latent_dims).fit((X, Y, X)).score((X, Y, Z))

# %%
# Higher order correlation methods
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Пример #11
0
### Linear CCA via alternating least squares (can pass more than 2 views)
"""

# %%
linear_cca = CCA(latent_dims=latent_dims)

linear_cca.fit(train_view_1, train_view_2)

linear_cca_results = np.stack((linear_cca.train_correlations[0, 1],
                               linear_cca.predict_corr(test_view_1,
                                                       test_view_2)[0, 1]))
"""
### (Regularized) Generalized CCA via alternating least squares (can pass more than 2 views)
"""

gcca = GCCA(latent_dims=latent_dims, c=[1, 1])

gcca.fit(train_view_1, train_view_2)

gcca_results = np.stack(
    (gcca.train_correlations[0, 1], gcca.predict_corr(test_view_1,
                                                      test_view_2)[0, 1]))
"""
### (Regularized) Multiset CCA via alternating least squares (can pass more than 2 views)
"""

mcca = MCCA(latent_dims=latent_dims, c=[0.5, 0.5])
# small ammount of regularisation added since data is not full rank

mcca.fit(train_view_1, train_view_2)