Пример #1
0
    def testPCA(self):
        X = self.iris

        for n_comp in np.arange(X.shape[1]):
            pca = PCA(n_components=n_comp, svd_solver='full')

            X_r = pca.fit(X).transform(X).fetch()
            np.testing.assert_equal(X_r.shape[1], n_comp)

            X_r2 = pca.fit_transform(X).fetch()
            assert_array_almost_equal(X_r, X_r2)

            X_r = pca.transform(X).fetch()
            X_r2 = pca.fit_transform(X).fetch()
            assert_array_almost_equal(X_r, X_r2)

            # Test get_covariance and get_precision
            cov = pca.get_covariance()
            precision = pca.get_precision()
            assert_array_almost_equal(
                mt.dot(cov, precision).execute(), np.eye(X.shape[1]), 12)

        # test explained_variance_ratio_ == 1 with all components
        pca = PCA(svd_solver='full')
        pca.fit(X)
        np.testing.assert_allclose(
            pca.explained_variance_ratio_.sum().execute(), 1.0, 3)
Пример #2
0
    def test_singular_values(self):
        # Check that the PCA output has the correct singular values

        rng = np.random.RandomState(0)
        n_samples = 100
        n_features = 80

        X = mt.tensor(rng.randn(n_samples, n_features))

        pca = PCA(n_components=2, svd_solver='full', random_state=rng).fit(X)
        rpca = PCA(n_components=2, svd_solver='randomized',
                   random_state=rng).fit(X)
        assert_array_almost_equal(pca.singular_values_.fetch(),
                                  rpca.singular_values_.fetch(), 1)

        # Compare to the Frobenius norm
        X_pca = pca.transform(X)
        X_rpca = rpca.transform(X)
        assert_array_almost_equal(
            mt.sum(pca.singular_values_**2.0).execute(),
            (mt.linalg.norm(X_pca, "fro")**2.0).execute(), 12)
        assert_array_almost_equal(
            mt.sum(rpca.singular_values_**2.0).execute(),
            (mt.linalg.norm(X_rpca, "fro")**2.0).execute(), 0)

        # Compare to the 2-norms of the score vectors
        assert_array_almost_equal(
            pca.singular_values_.fetch(),
            mt.sqrt(mt.sum(X_pca**2.0, axis=0)).execute(), 12)
        assert_array_almost_equal(
            rpca.singular_values_.fetch(),
            mt.sqrt(mt.sum(X_rpca**2.0, axis=0)).execute(), 2)

        # Set the singular values and see what we get back
        rng = np.random.RandomState(0)
        n_samples = 100
        n_features = 110

        X = mt.tensor(rng.randn(n_samples, n_features))

        pca = PCA(n_components=3, svd_solver='full', random_state=rng)
        rpca = PCA(n_components=3, svd_solver='randomized', random_state=rng)
        X_pca = pca.fit_transform(X)

        X_pca /= mt.sqrt(mt.sum(X_pca**2.0, axis=0))
        X_pca[:, 0] *= 3.142
        X_pca[:, 1] *= 2.718

        X_hat = mt.dot(X_pca, pca.components_)
        pca.fit(X_hat)
        rpca.fit(X_hat)
        assert_array_almost_equal(pca.singular_values_.fetch(),
                                  [3.142, 2.718, 1.0], 14)
        assert_array_almost_equal(rpca.singular_values_.fetch(),
                                  [3.142, 2.718, 1.0], 14)
Пример #3
0
    def testWhitening(self):
        # Check that PCA output has unit-variance
        rng = np.random.RandomState(0)
        n_samples = 100
        n_features = 80
        n_components = 30
        rank = 50

        # some low rank data with correlated features
        X = mt.dot(
            rng.randn(n_samples, rank),
            mt.dot(mt.diag(mt.linspace(10.0, 1.0, rank)),
                   rng.randn(rank, n_features)))
        # the component-wise variance of the first 50 features is 3 times the
        # mean component-wise variance of the remaining 30 features
        X[:, :50] *= 3

        self.assertEqual(X.shape, (n_samples, n_features))

        # the component-wise variance is thus highly varying:
        self.assertGreater(X.std(axis=0).std().execute(), 43.8)

        for solver, copy in product(self.solver_list, (True, False)):
            # whiten the data while projecting to the lower dim subspace
            X_ = X.copy()  # make sure we keep an original across iterations.
            pca = PCA(n_components=n_components,
                      whiten=True,
                      copy=copy,
                      svd_solver=solver,
                      random_state=0,
                      iterated_power=7)
            # test fit_transform
            X_whitened = pca.fit_transform(X_.copy())
            self.assertEqual(X_whitened.shape, (n_samples, n_components))
            X_whitened2 = pca.transform(X_)
            assert_array_almost_equal(X_whitened.fetch(), X_whitened2.fetch())

            assert_almost_equal(X_whitened.std(ddof=1, axis=0).execute(),
                                np.ones(n_components),
                                decimal=6)
            assert_almost_equal(
                X_whitened.mean(axis=0).execute(), np.zeros(n_components))

            X_ = X.copy()
            pca = PCA(n_components=n_components,
                      whiten=False,
                      copy=copy,
                      svd_solver=solver).fit(X_)
            X_unwhitened = pca.transform(X_)
            self.assertEqual(X_unwhitened.shape, (n_samples, n_components))

            # in that case the output components still have varying variances
            assert_almost_equal(
                X_unwhitened.std(axis=0).std().execute(), 74.1, 1)
Пример #4
0
    def test_pca_deterministic_output(self):
        rng = np.random.RandomState(0)
        X = mt.tensor(rng.rand(10, 10))

        for solver in self.solver_list:
            transformed_X = mt.zeros((20, 2))
            for i in range(20):
                pca = PCA(n_components=2, svd_solver=solver, random_state=rng)
                transformed_X[i, :] = pca.fit_transform(X)[0]
            np.testing.assert_allclose(
                transformed_X.execute(), mt.tile(transformed_X[0, :], 20).reshape(20, 2).execute())
Пример #5
0
    def testPCARandomizedSolver(self):
        # PCA on dense arrays
        X = self.iris

        # Loop excluding the 0, invalid for randomized
        for n_comp in np.arange(1, X.shape[1]):
            pca = PCA(n_components=n_comp,
                      svd_solver='randomized',
                      random_state=0)

            X_r = pca.fit(X).transform(X)
            np.testing.assert_equal(X_r.shape[1], n_comp)

            X_r2 = pca.fit_transform(X)
            assert_array_almost_equal(X_r.fetch(), X_r2.fetch())

            X_r = pca.transform(X)
            assert_array_almost_equal(X_r.fetch(), X_r2.fetch())

            # Test get_covariance and get_precision
            cov = pca.get_covariance()
            precision = pca.get_precision()
            assert_array_almost_equal(
                mt.dot(cov, precision).execute(),
                mt.eye(X.shape[1]).execute(), 12)

        pca = PCA(n_components=0, svd_solver='randomized', random_state=0)
        with self.assertRaises(ValueError):
            pca.fit(X)

        pca = PCA(n_components=0, svd_solver='randomized', random_state=0)
        with self.assertRaises(ValueError):
            pca.fit(X)
        # Check internal state
        self.assertEqual(
            pca.n_components,
            PCA(n_components=0, svd_solver='randomized',
                random_state=0).n_components)
        self.assertEqual(
            pca.svd_solver,
            PCA(n_components=0, svd_solver='randomized',
                random_state=0).svd_solver)