Пример #1
0
    def test_converts_input_domain_if_needed(self):
        self.create_normal_dataset()
        projector = linear.Pca(variance_covered=.99)(self.dataset)

        new_examples = data.Table(data.Domain(self.dataset.domain.features[:5]), [[1.,2.,3.,4.,5.]])

        projector(new_examples)
Пример #2
0
    def test_pca_with_max_components(self):
        max_components = 3
        self.create_dataset(ncomponents = max_components + 3)

        pca = linear.Pca(max_components=max_components)(self.dataset)

        nvectors, vector_dimension = pca.projection.shape
        self.assertEqual(nvectors, max_components)
Пример #3
0
    def test_pca_with_variance_covered(self):
        ncomponents = 3
        self.create_dataset(ncomponents=ncomponents)

        pca = linear.Pca(variance_covered=.99)(self.dataset)

        nvectors, vector_dimension = pca.projection.shape
        self.assertEqual(nvectors, ncomponents)
Пример #4
0
    def test_projected_domain_can_convert_data_without_class(self):
        self.create_normal_dataset()
        projector = linear.Pca(variance_covered=.99)(self.dataset)

        projected_data = projector(self.dataset)
        converted_data = data.Table(projected_data.domain, self.dataset)

        self.assertItemsEqual(projected_data, converted_data)
Пример #5
0
    def test_total_variance_remains_the_same(self):
        for m in (10, 250):
            self.create_dataset(m=m)

            pca = linear.Pca()(self.dataset)

            self.assertAlmostEqual(pca.variance_sum, pca.variances.sum())
            self.assertAlmostEqual(pca.variance_sum, (self.principal_components != 0).sum())
Пример #6
0
    def test_pca_with_standardization(self):
        self.create_dataset(ncomponents=1)

        pca = linear.Pca(standardize=True)(self.dataset)
        projection = pca.projection[0]
        non_zero_elements = projection[projection.nonzero()]

        # since values in all dimensions are normally distributed, dimensions should be treated as equally important
        self.assertAlmostEqual(non_zero_elements.min(), non_zero_elements.max())
Пример #7
0
    def test_pca(self):
        for m in (10, 250):
            self.create_dataset(m=m)

            pca = linear.Pca(standardize=False)(self.dataset)

            self.assertInCorrectSpace(pca.projection[pca.variances > 0.01, :])
            for v in pca.projection:
                # projections vectors should be normalized
                self.assertAlmostEqual(np.linalg.norm(v), 1.)

            # Components should have decreasing variants
            self.assertListEqual(pca.variances.tolist(), sorted(pca.variances, reverse=True))
Пример #8
0
    def test_projected_domain_contains_class_vars(self):
        self.create_dataset_with_classes()

        projector = linear.Pca(variance_covered=.99)(self.dataset)
        projected_data = projector(self.dataset)

        self.assertIn(self.dataset.domain.class_var, projected_data.domain)
        for class_ in self.dataset.domain.class_vars:
            self.assertIn(class_, projected_data.domain)
        for ex1, ex2 in zip(self.dataset, projected_data):
            self.assertEqual(ex1.get_class(), ex2.get_class())
            for v1, v2 in zip(ex1.get_classes(), ex2.get_classes()):
                self.assertEqual(v2, v2)
Пример #9
0
    def test_projects_data_table(self):
        self.create_normal_dataset()
        projector = linear.Pca(variance_covered=.99)(self.dataset)

        projector(self.dataset)
Пример #10
0
    def test_pca_on_only_constant_features(self):
        self.create_constant_dataset()

        with self.assertRaises(ValueError):
            linear.Pca()(self.dataset)
Пример #11
0
    def test_pca_on_empty_data(self):
        self.create_empty_dataset()

        with self.assertRaises(ValueError):
            linear.Pca()(self.dataset)
Пример #12
0
    def test_pca_handles_unknowns(self):
        self.create_dataset_with_unknowns()

        linear.Pca()(self.dataset)