Exemplo n.º 1
0
def test_pca(n_components, n_individual_components, multiview_output):
    gpca = GroupPCA(
        n_components=n_components,
        n_individual_components=n_individual_components,
        multiview_output=multiview_output,
    )
    n_samples = 100
    n_features = [6, 4, 5]
    rng = np.random.RandomState(0)
    Xs = [
        rng.multivariate_normal(np.zeros(p), np.eye(p), size=n_samples)
        for p in n_features
    ]
    # check the shape of fit.transform
    X_r = gpca.fit(Xs).transform(Xs)
    if multiview_output:
        assert len(X_r) == 3
        for X in X_r:
            assert X.shape[0] == n_samples
            if n_components is not None:
                assert X.shape[1] == n_components
    else:
        assert X_r.shape[0] == n_samples
        if n_components is not None:
            assert X_r.shape[1] == n_components

    # check the equivalence of fit.transform and fit_transform
    X_r2 = gpca.fit_transform(Xs)
    X_r = gpca.transform(Xs)
    assert_allclose(X_r, X_r2)
Exemplo n.º 2
0
def test_whitening(n_individual_components, prewhiten, multiview_output):
    # Check that PCA output has unit-variance
    rng = np.random.RandomState(0)
    n_samples = 100
    n_features = 80
    n_components = 30
    rank = 50

    # some low rank data with correlated features
    X = np.dot(
        rng.randn(n_samples, rank),
        np.dot(np.diag(np.linspace(10.0, 1.0, rank)),
               rng.randn(rank, n_features)),
    )
    # the component-wise variance of the first 50 features is 3 times the
    # mean component-wise variance of the remaining 30 features
    X[:, :50] *= 3
    assert X.shape == (n_samples, n_features)
    # the component-wise variance is thus highly varying:
    assert X.std(axis=0).std() > 43.8
    Xs = np.array_split(X, 3, axis=1)
    print([x.shape for x in Xs])
    Xs_ = Xs.copy()  # make sure we keep an original across iterations.
    gpca = GroupPCA(
        n_components=n_components,
        whiten=True,
        prewhiten=prewhiten,
        random_state=0,
        n_individual_components=n_individual_components,
        multiview_output=multiview_output,
    )
    # test fit_transform
    X_whitened = gpca.fit_transform(Xs_)
    X_whitened2 = gpca.transform(Xs_)
    assert_allclose(X_whitened, X_whitened2, rtol=5e-4)
    if multiview_output:
        assert len(X_whitened) == 3
        for X in X_whitened:
            assert X.shape == (n_samples, n_components)
    else:
        assert X_whitened.shape == (n_samples, n_components)
        assert_allclose(X_whitened.std(ddof=1, axis=0), np.ones(n_components))

    Xs_ = Xs.copy()
    gpca = GroupPCA(
        n_components=n_components,
        whiten=False,
        prewhiten=prewhiten,
        n_individual_components=n_individual_components,
        multiview_output=multiview_output,
        random_state=rng,
    ).fit(Xs)
    X_unwhitened = gpca.transform(Xs_)
    if multiview_output:
        assert len(X_unwhitened) == 3
        for X in X_unwhitened:
            assert X.shape == (n_samples, n_components)
    else:
        assert X_unwhitened.shape == (n_samples, n_components)
Exemplo n.º 3
0
def test_grouppca_deterministic_output():
    n_samples = 100
    n_features = [6, 4, 5]
    rng = np.random.RandomState(0)
    Xs = [
        rng.multivariate_normal(np.zeros(p), np.eye(p), size=n_samples)
        for p in n_features
    ]
    transformed_X = np.zeros((20, 2))
    for i in range(20):
        pca = GroupPCA(
            n_components=2,
            n_individual_components=3,
            multiview_output=False,
            random_state=rng,
        )
        transformed_X[i, :] = pca.fit_transform(Xs)[0]
    assert_allclose(transformed_X,
                    np.tile(transformed_X[0, :], 20).reshape(20, 2))