def test_frob_norm(self): """ Check Frobenius norm is calculated correctly whether the full or partial PCA is computed. """ true_frob_norm = np.linalg.norm(self.X_cent, ord='fro') PCA = pca(n_components=None).fit(self.X) self.assertTrue(np.allclose(PCA.frob_norm_, true_frob_norm)) # TODO: this is failing, it could be a numerical issue. PCA = pca(n_components=3).fit(self.X) self.assertTrue(np.allclose(PCA.frob_norm_, true_frob_norm))
def test_reconstruction(self): """ We can reconstruct the original data matrix exactly from the full reconstruction. """ PCA = pca().fit(self.X) self.assertTrue(np.allclose(self.X, PCA.predict_reconstruction()))
def test_scores(): X = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]) pca2 = pca() pca2.fit(X) scores = np.array([[-7.794229, 0., -0.], [-2.598076, 0., 0.], [2.598076, 0., 0.], [7.794229, -0., 0.]]) assert_almost_equal(scores, pca2.scores(norm=False, np=True), decimal=4)
def test_predict_reconstruction(): X = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]) pca2 = pca() pca2.fit(X) recon = pca2.predict_reconstruction(X) R = np.array([[-47.7852752, -46.7852752, -45.7852752], [-12.9284251, -11.9284251, -10.9284251], [21.9284251, 22.9284251, 23.9284251], [56.7852752, 57.7852752, 58.7852752]]) assert_almost_equal(R, recon, decimal=4)
def test_dunder_repr(): """ Make sure __repr__ function works. """ pca1 = pca(n_components=2, center='mean') compare = 'pca object, nothing has been computed yet' assert_equal(pca1.__repr__(), compare) X = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]]) pca1.fit(X) compare = 'Rank {} pca of a {} matrix'.format(2, X.shape) assert_equal(pca1.__repr__(), compare)
def test_centering(self): """ Make sure PCA computes the correct centers. Also check center=False works correctly. """ self.assertTrue(np.allclose(self.pca.m_, self.X.mean(axis=0))) # no centering PCA = pca(n_components=4, center=False).fit(self.X) self.assertTrue(PCA.m_ is None) Z = np.random.normal(size=(20, self.X.shape[1])) V = PCA.loadings_.values self.assertTrue(np.allclose(PCA.predict_scores(Z), np.dot(Z, V)))
def setUp(self): n = 100 d = 20 n_components = 10 obs_names = ['sample_{}'.format(i) for i in range(n)] var_names = ['var_{}'.format(i) for i in range(d)] X = pd.DataFrame(np.random.normal(size=(n, d)), index=obs_names, columns=var_names) X_cent = X - X.mean(axis=0) PCA = pca(n_components=n_components).fit(X) # store these for testing self.n = n self.d = d self.n_components = n_components self.obs_names = obs_names self.var_names = var_names self.X = X self.X_cent = X_cent self.pca = PCA
def test_n_components_None(): pca1 = pca() X = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]) pca1.fit(X) d = pca1.get_params() assert_equal(d['n_components'], X.shape[1])