Пример #1
0
    def test_from_numpy_names(self):
        d = Domain.from_numpy(np.zeros((1, 5)))
        self.assertTrue(d.anonymous)
        self.assertEqual([var.name for var in d.attributes],
                         ["Feature {}".format(i) for i in range(1, 6)])

        d = Domain.from_numpy(np.zeros((1, 99)))
        self.assertTrue(d.anonymous)
        self.assertEqual([var.name for var in d.attributes],
                         ["Feature {:02}".format(i) for i in range(1, 100)])

        d = Domain.from_numpy(np.zeros((1, 100)))
        self.assertTrue(d.anonymous)
        self.assertEqual([var.name for var in d.attributes],
                         ["Feature {:03}".format(i) for i in range(1, 101)])

        d = Domain.from_numpy(np.zeros((1, 1)))
        self.assertTrue(d.anonymous)
        self.assertEqual(d.attributes[0].name, "Feature")

        d = Domain.from_numpy(np.zeros((1, 3)), np.zeros((1, 1)),
                              np.zeros((1, 100)))
        self.assertTrue(d.anonymous)
        self.assertEqual([var.name for var in d.attributes],
                         ["Feature {}".format(i) for i in range(1, 4)])
        self.assertEqual(d.class_var.name, "Target")
        self.assertEqual([var.name for var in d.metas],
                         ["Meta {:03}".format(i) for i in range(1, 101)])
Пример #2
0
    def test_do_not_recluster_on_same_data(self):
        """Do not recluster data points when targets or metas change."""

        # Prepare some dummy data
        x = np.eye(5)
        y1, y2 = np.ones((5, 1)), np.ones((5, 2))
        meta1, meta2 = np.ones((5, 1)), np.ones((5, 2))

        table1 = Table.from_numpy(
            domain=Domain.from_numpy(X=x, Y=y1, metas=meta1),
            X=x, Y=y1, metas=meta1,
        )
        # X is same, should not cause update
        table2 = Table.from_numpy(
            domain=Domain.from_numpy(X=x, Y=y2, metas=meta2),
            X=x, Y=y2, metas=meta2,
        )
        # X is different, should cause update
        table3 = table1.copy()
        table3.X[:, 0] = 1

        with patch.object(self.widget, '_invalidate_output') as commit:
            self.send_signal(self.widget.Inputs.data, table1)
            self.commit_and_wait()
            call_count = commit.call_count

            # Sending data with same X should not recompute the clustering
            self.send_signal(self.widget.Inputs.data, table2)
            self.commit_and_wait()
            self.assertEqual(call_count, commit.call_count)

            # Sending data with different X should recompute the clustering
            self.send_signal(self.widget.Inputs.data, table3)
            self.commit_and_wait()
            self.assertEqual(call_count + 1, commit.call_count)
Пример #3
0
    def test_do_not_recluster_on_same_data(self):
        """Do not recluster data points when targets or metas change."""

        # Prepare some dummy data
        x = np.eye(5)
        y1, y2 = np.ones((5, 1)), np.ones((5, 2))
        meta1, meta2 = np.ones((5, 1)), np.ones((5, 2))

        table1 = Table.from_numpy(
            domain=Domain.from_numpy(X=x, Y=y1, metas=meta1),
            X=x, Y=y1, metas=meta1,
        )
        # X is same, should not cause update
        table2 = Table.from_numpy(
            domain=Domain.from_numpy(X=x, Y=y2, metas=meta2),
            X=x, Y=y2, metas=meta2,
        )
        # X is different, should cause update
        table3 = table1.copy()
        table3.X[:, 0] = 1

        with patch.object(self.widget, 'commit') as commit:
            self.send_signal(self.widget.Inputs.data, table1)
            self.commit_and_wait()
            call_count = commit.call_count

            # Sending data with same X should not recompute the clustering
            self.send_signal(self.widget.Inputs.data, table2)
            self.commit_and_wait()
            self.assertEqual(call_count, commit.call_count)

            # Sending data with different X should recompute the clustering
            self.send_signal(self.widget.Inputs.data, table3)
            self.commit_and_wait()
            self.assertEqual(call_count + 1, commit.call_count)
Пример #4
0
    def test_do_not_recluster_on_same_data(self):
        """Do not recluster data points when targets or metas change."""

        # Prepare some dummy data
        x = np.eye(5)
        y1, y2 = np.ones((5, 1)), np.ones((5, 2))
        meta1, meta2 = np.ones((5, 1)), np.ones((5, 2))

        table1 = Table.from_numpy(
            domain=Domain.from_numpy(X=x, Y=y1, metas=meta1),
            X=x, Y=y1, metas=meta1,
        )
        # X is same, should not cause update
        table2 = Table.from_numpy(
            domain=Domain.from_numpy(X=x, Y=y2, metas=meta2),
            X=x, Y=y2, metas=meta2,
        )
        # X is different, should cause update
        table3 = table1.copy()
        with table3.unlocked():
            table3.X[:, 0] = 1

        with patch.object(self.widget, 'unconditional_commit') as commit:
            self.send_signal(self.widget.Inputs.data, table1)
            self.commit_and_wait()
            commit.reset_mock()

            # Sending data with same X should not recompute the clustering
            self.send_signal(self.widget.Inputs.data, table2)
            commit.assert_not_called()

            # Sending data with different X should recompute the clustering
            self.send_signal(self.widget.Inputs.data, table3)
            commit.assert_called_once()
    def test_from_numpy_names(self):
        for n_cols, name in [
            (5, "Feature {}"),
            (99, "Feature {:02}"),
            (100, "Feature {:03}"),
        ]:
            d = Domain.from_numpy(np.zeros((1, n_cols)))
            self.assertTrue(d.anonymous)
            self.assertEqual(
                [var.name for var in d.attributes],
                [name.format(i) for i in range(1, n_cols + 1)],
            )

        d = Domain.from_numpy(np.zeros((1, 1)))
        self.assertTrue(d.anonymous)
        self.assertEqual(d.attributes[0].name, "Feature")

        d = Domain.from_numpy(np.zeros((1, 3)), np.zeros((1, 1)),
                              np.zeros((1, 100)))
        self.assertTrue(d.anonymous)
        self.assertEqual(
            [var.name for var in d.attributes],
            ["Feature {}".format(i) for i in range(1, 4)],
        )
        self.assertEqual(d.class_var.name, "Target")
        self.assertEqual(
            [var.name for var in d.metas],
            ["Meta {:03}".format(i) for i in range(1, 101)],
        )
Пример #6
0
    def test_from_numpy_names(self):
        d = Domain.from_numpy(np.zeros((1, 5)))
        self.assertTrue(d.anonymous)
        self.assertEqual([var.name for var in d.attributes],
                         ["Feature {}".format(i) for i in range(1, 6)])

        d = Domain.from_numpy(np.zeros((1, 99)))
        self.assertTrue(d.anonymous)
        self.assertEqual([var.name for var in d.attributes],
                         ["Feature {:02}".format(i) for i in range(1, 100)])

        d = Domain.from_numpy(np.zeros((1, 100)))
        self.assertTrue(d.anonymous)
        self.assertEqual([var.name for var in d.attributes],
                         ["Feature {:03}".format(i) for i in range(1, 101)])

        d = Domain.from_numpy(np.zeros((1, 1)))
        self.assertTrue(d.anonymous)
        self.assertEqual(d.attributes[0].name, "Feature")

        d = Domain.from_numpy(np.zeros((1, 3)), np.zeros((1, 1)),
                              np.zeros((1, 100)))
        self.assertTrue(d.anonymous)
        self.assertEqual([var.name for var in d.attributes],
                         ["Feature {}".format(i) for i in range(1, 4)])
        self.assertEqual(d.class_var.name, "Target")
        self.assertEqual([var.name for var in d.metas],
                         ["Meta {:03}".format(i) for i in range(1, 101)])
Пример #7
0
    def test_from_numpy_dimensions(self):
        d = Domain.from_numpy(np.zeros((1, 1)), np.zeros(5))
        self.assertTrue(d.anonymous)
        self.assertEqual(len(d.class_vars), 1)

        d = Domain.from_numpy(np.zeros((1, 1)), np.zeros((5, 1)))
        self.assertTrue(d.anonymous)
        self.assertEqual(len(d.class_vars), 1)

        self.assertRaises(ValueError, Domain.from_numpy, np.zeros(2))
        self.assertRaises(ValueError, Domain.from_numpy, np.zeros((2, 2, 2)))
        self.assertRaises(ValueError, Domain.from_numpy, np.zeros((2, 2)), np.zeros((2, 2, 2)))
Пример #8
0
    def test_from_numpy_dimensions(self):
        d = Domain.from_numpy(np.zeros((1, 1)), np.zeros(5))
        self.assertTrue(d.anonymous)
        self.assertEqual(len(d.class_vars), 1)

        d = Domain.from_numpy(np.zeros((1, 1)), np.zeros((5, 1)))
        self.assertTrue(d.anonymous)
        self.assertEqual(len(d.class_vars), 1)

        self.assertRaises(ValueError, Domain.from_numpy, np.zeros(2))
        self.assertRaises(ValueError, Domain.from_numpy, np.zeros((2, 2, 2)))
        self.assertRaises(ValueError, Domain.from_numpy, np.zeros((2, 2)), np.zeros((2, 2, 2)))
Пример #9
0
    def test_from_numpy_values(self):
        d = Domain.from_numpy(np.zeros((1, 1)), np.arange(1, 3).reshape(2, 1))
        self.assertTrue(d.anonymous)
        self.assertIsInstance(d.class_var, ContinuousVariable)

        d = Domain.from_numpy(np.zeros((1, 1)), np.arange(2).reshape(2, 1))
        self.assertTrue(d.anonymous)
        self.assertIsInstance(d.class_var, DiscreteVariable)
        self.assertEqual(d.class_var.values, ["v{}".format(i)
                                              for i in range(1, 3)])

        d = Domain.from_numpy(np.zeros((1, 1)), np.arange(18, 23).reshape(5, 1))
        self.assertTrue(d.anonymous)
        self.assertIsInstance(d.class_var, ContinuousVariable)
Пример #10
0
    def test_from_numpy_values(self):
        d = Domain.from_numpy(np.zeros((1, 1)), np.arange(1, 3).reshape(2, 1))
        self.assertTrue(d.anonymous)
        self.assertIsInstance(d.class_var, ContinuousVariable)

        d = Domain.from_numpy(np.zeros((1, 1)), np.arange(2).reshape(2, 1))
        self.assertTrue(d.anonymous)
        self.assertIsInstance(d.class_var, DiscreteVariable)
        self.assertEqual(d.class_var.values, ["v{}".format(i)
                                              for i in range(1, 3)])

        d = Domain.from_numpy(np.zeros((1, 1)), np.arange(18, 23).reshape(5, 1))
        self.assertTrue(d.anonymous)
        self.assertIsInstance(d.class_var, ContinuousVariable)
Пример #11
0
def run_cn2(Xtr, Ytr, Xt, Yt, lb, k=None, log=None):
    domainx = Domain.from_numpy(Xtr.values)
    domainy = Domain.from_numpy(Ytr.values.reshape((-1, 1)))
    datax = Orange.data.Table.from_numpy(domainx, Xtr.values)
    datay = Orange.data.Table.from_numpy(domainy, Ytr.values.reshape((-1, 1)))
    discretizer = Orange.preprocess.DomainDiscretizer()
    domainx = discretizer(datax)
    domainy = discretizer(datay)
    domain = Domain(domainx.attributes, domainy.attributes[0])
    data = Orange.data.Table.from_numpy(domain, Xtr.values, Y=Ytr.values)

    learner = Orange.classification.CN2UnorderedLearner()
    #learner = Orange.classification.rules.CN2Learner()
    learner.rule_finder.search_algorithm.beam_width = 10
    learner.rule_finder.search_strategy.constrain_continuous = True
    learner.rule_finder.general_validator.min_covered_examples = 15
    cn2 = learner(data)

    if k is not None:
        r_def = cn2.rule_list[-1]
        cn2.rule_list = cn2.rule_list[:k]
        cn2.rule_list.append(r_def)

    Y_pred = np.argmax(cn2.predict(Xt.values), axis=1)

    ids = np.arange(Xtr.shape[0])
    print('default:', cn2.rule_list[-1].prediction)
    # Skip the last default rule
    for i, r in enumerate(cn2.rule_list[:-1]):
        cov = np.array([r.evaluate_instance(x) for x in data])
        pred = np.array([r.prediction] * sum(cov))
        acc = pred == Ytr.values[cov]
        r.covered = set(ids[cov])
        print(
            'CN2', '#{}, label:{}, len:{}, cov:{}, acc:{}'.format(
                i, r.prediction, r.length,
                sum(cov) / len(ids),
                sum(acc) / sum(cov)))

    if log is None:
        from logger import log
    log('cn2-k', len(cn2.rule_list[:-1]))
    [log('cn2-nconds', r.length, i) for i, r in enumerate(cn2.rule_list[:-1])]
    log('cn2-auc', roc_auc_score(lb.transform(Yt.values),
                                 lb.transform(Y_pred)))
    log('cn2-bacc', balanced_accuracy_score(Yt, Y_pred))
    log('cn2-disp', dispersion_(cn2.rule_list[:-1], average=True))
    log('cn2-overlap', overlap(cn2.rule_list[:-1]))
    print(confusion_matrix(Yt, Y_pred))
Пример #12
0
    def __into_orange_table(self, attrs, X, meta_parts):
        if not attrs and X.shape[1]:
            attrs = Domain.from_numpy(X).attributes

        try:
            metas = None
            M = None
            if meta_parts:
                meta_parts = [
                    df_.reset_index() if not df_.index.is_integer() else df_
                    for df_ in meta_parts
                ]
                metas = [
                    StringVariable.make(name)
                    for name in chain(*(_.columns for _ in meta_parts))
                ]
                M = np.hstack(tuple(df_.values for df_ in meta_parts))

            domain = Domain(attrs, metas=metas)
            table = Table.from_numpy(domain, X, None, M)
        except ValueError:
            table = None
            rows = self.leading_cols if self.transposed else self.leading_rows
            cols = self.leading_rows if self.transposed else self.leading_cols
            self.errors["inadequate_headers"] = (rows, cols)
        return table
Пример #13
0
    def test_latlon_detection_heuristic(self):
        xy = np.c_[np.random.uniform(-180, 180, 100),
                   np.random.uniform(-90, 90, 100)]
        data = Table.from_numpy(Domain.from_numpy(xy), xy)
        self.widget.set_data(data)

        self.assertIn(self.widget.lat_attr, data.domain)
        self.assertIn(self.widget.lon_attr, data.domain)
    def test_latlon_detection_heuristic(self):
        xy = np.c_[np.random.uniform(-180, 180, 100),
                   np.random.uniform(-90, 90, 100)]
        data = Table.from_numpy(Domain.from_numpy(xy), xy)
        self.widget.set_data(data)

        self.assertIn(self.widget.lat_attr, data.domain)
        self.assertIn(self.widget.lon_attr, data.domain)
Пример #15
0
 def test_from_numpy_values(self):
     for aran_min, aran_max, vartype in [(1, 3, ContinuousVariable),
                                         (0, 2, DiscreteVariable),
                                         (18, 23, ContinuousVariable)]:
         n_rows, n_cols, = aran_max - aran_min, 1
         d = Domain.from_numpy(np.zeros((1, 1)), np.arange(aran_min, aran_max).reshape(n_rows, n_cols))
         self.assertTrue(d.anonymous)
         self.assertIsInstance(d.class_var, vartype)
         if isinstance(vartype, DiscreteVariable):
             self.assertEqual(d.class_var.values, ["v{}".format(i) for i in range(1, 3)])
Пример #16
0
 def test_anova(self):
     nrows, ncols = 500, 5
     X = np.random.rand(nrows, ncols)
     y = 4 + (-3 * X[:, 1] + X[:, 3]) // 2
     domain = Domain.from_numpy(X, y)
     domain = Domain(domain.attributes,
                     DiscreteVariable('c', values=np.unique(y)))
     data = Table(domain, X, y)
     scorer = ANOVA()
     sc = [scorer(data, a) for a in range(ncols)]
     self.assertTrue(np.argmax(sc) == 1)
Пример #17
0
 def test_anova(self):
     nrows, ncols = 500, 5
     X = np.random.rand(nrows, ncols)
     y = 4 + (-3*X[:, 1] + X[:, 3]) // 2
     domain = Domain.from_numpy(X, y)
     domain = Domain(domain.attributes,
                     DiscreteVariable('c', values=np.unique(y)))
     data = Table(domain, X, y)
     scorer = score.ANOVA()
     sc = [scorer(data, a) for a in range(ncols)]
     self.assertTrue(np.argmax(sc) == 1)
Пример #18
0
    def test_missing_values_with_no_pca_preprocessing(self):
        data = np.ones((5, 5))
        data[range(5), range(5)] = np.nan
        np.random.shuffle(data)

        table = Table.from_numpy(domain=Domain.from_numpy(X=data), X=data)
        self.send_signal(self.widget.Inputs.data, table)
        self.widget.apply_pca = False
        self.widget.commit(force=True)

        self.assertTrue(self.widget.Error.data_has_nans.is_shown())
Пример #19
0
    def test_improved_randomized_pca_properly_called(self):
        # It doesn't matter what we put into the matrix
        x_ = np.random.normal(0, 1, (100, 20))
        x = Table.from_numpy(Domain.from_numpy(x_), x_)

        pca.randomized_pca = MagicMock(wraps=pca.randomized_pca)
        PCA(10, svd_solver="randomized", random_state=42)(x)
        pca.randomized_pca.assert_called_once()

        pca.randomized_pca.reset_mock()
        PCA(10, svd_solver="arpack", random_state=42)(x)
        pca.randomized_pca.assert_not_called()
Пример #20
0
 def test_chi2(self):
     nrows, ncols = 500, 5
     X = np.random.randint(4, size=(nrows, ncols))
     y = 10 + (-3 * X[:, 1] + X[:, 3]) // 2
     domain = Domain.from_numpy(X, y)
     domain = Domain(domain.attributes,
                     DiscreteVariable('c', values=np.unique(y)))
     table = Table(domain, X, y)
     data = preprocess.Discretize()(table)
     scorer = Chi2()
     sc = [scorer(data, a) for a in range(ncols)]
     self.assertTrue(np.argmax(sc) == 1)
Пример #21
0
    def test_improved_randomized_pca_properly_called(self):
        # It doesn't matter what we put into the matrix
        x_ = np.random.normal(0, 1, (100, 20))
        x = Table.from_numpy(Domain.from_numpy(x_), x_)

        pca.randomized_pca = MagicMock(wraps=pca.randomized_pca)
        PCA(10, svd_solver="randomized", random_state=42)(x)
        pca.randomized_pca.assert_called_once()

        pca.randomized_pca.reset_mock()
        PCA(10, svd_solver="arpack", random_state=42)(x)
        pca.randomized_pca.assert_not_called()
Пример #22
0
 def test_chi2(self):
     nrows, ncols = 500, 5
     X = np.random.randint(4, size=(nrows, ncols))
     y = 10 + (-3*X[:, 1] + X[:, 3]) // 2
     domain = Domain.from_numpy(X, y)
     domain = Domain(domain.attributes,
                     DiscreteVariable('c', values=np.unique(y)))
     table = Table(domain, X, y)
     data = preprocess.Discretize()(table)
     scorer = score.Chi2()
     sc = [scorer(data, a) for a in range(ncols)]
     self.assertTrue(np.argmax(sc) == 1)
Пример #23
0
    def test_from_numpy_names(self):
        for n_cols, name in [(5, "Feature {}"),
                             (99, "Feature {:02}"),
                             (100, "Feature {:03}")]:
            d = Domain.from_numpy(np.zeros((1, n_cols)))
            self.assertTrue(d.anonymous)
            self.assertEqual([var.name for var in d.attributes],
                             [name.format(i) for i in range(1, n_cols+1)])

        d = Domain.from_numpy(np.zeros((1, 1)))
        self.assertTrue(d.anonymous)
        self.assertEqual(d.attributes[0].name, "Feature")

        d = Domain.from_numpy(np.zeros((1, 3)), np.zeros((1, 1)),
                              np.zeros((1, 100)))
        self.assertTrue(d.anonymous)
        self.assertEqual([var.name for var in d.attributes],
                         ["Feature {}".format(i) for i in range(1, 4)])
        self.assertEqual(d.class_var.name, "Target")
        self.assertEqual([var.name for var in d.metas],
                         ["Meta {:03}".format(i) for i in range(1, 101)])
Пример #24
0
    def test_improved_randomized_pca_sparse_data(self):
        """Randomized PCA should work well on dense data."""
        random_state = check_random_state(42)

        # Let's take a tall, skinny matrix
        x_ = random_state.negative_binomial(1, 0.5, (100, 20))
        x = Table.from_numpy(Domain.from_numpy(x_), x_).to_sparse()

        pca = PCA(10, svd_solver="full",
                  random_state=random_state)(x.to_dense())
        rpca = PCA(10, svd_solver="randomized", random_state=random_state)(x)

        np.testing.assert_almost_equal(pca.components_,
                                       rpca.components_,
                                       decimal=8)
        np.testing.assert_almost_equal(pca.explained_variance_,
                                       rpca.explained_variance_,
                                       decimal=8)
        np.testing.assert_almost_equal(pca.singular_values_,
                                       rpca.singular_values_,
                                       decimal=8)

        # And take a short, fat matrix
        x_ = random_state.negative_binomial(1, 0.5, (20, 100))
        x = Table.from_numpy(Domain.from_numpy(x_), x_).to_sparse()

        pca = PCA(10, svd_solver="full",
                  random_state=random_state)(x.to_dense())
        rpca = PCA(10, svd_solver="randomized", random_state=random_state)(x)

        np.testing.assert_almost_equal(pca.components_,
                                       rpca.components_,
                                       decimal=8)
        np.testing.assert_almost_equal(pca.explained_variance_,
                                       rpca.explained_variance_,
                                       decimal=8)
        np.testing.assert_almost_equal(pca.singular_values_,
                                       rpca.singular_values_,
                                       decimal=8)
Пример #25
0
    def test_rrelieff(self):
        X = np.random.random((100, 5))
        y = ((X[:, 0] > .5) ^ (X[:, 1] < .5) - 1).astype(float)
        xor = Table.from_numpy(Domain.from_numpy(X, y), X, y)

        scorer = score.RReliefF()
        weights = scorer(xor, None)
        best = {xor.domain[attr].name for attr in weights.argsort()[-2:]}
        self.assertSetEqual(set(a.name for a in xor.domain.attributes[:2]), best)

        weights = scorer(self.housing, None)
        best = {self.housing.domain[attr].name for attr in weights.argsort()[-6:]}
        for feature in ('LSTAT', 'RM', 'AGE'):
            self.assertIn(feature, best)
Пример #26
0
    def test_rrelieff(self):
        X = np.random.random((100, 5))
        y = ((X[:, 0] > .5) ^ (X[:, 1] < .5) - 1).astype(float)
        xor = Table.from_numpy(Domain.from_numpy(X, y), X, y)

        scorer = score.RReliefF()
        weights = scorer(xor, None)
        best = {xor.domain[attr].name for attr in weights.argsort()[-2:]}
        self.assertSetEqual(set(a.name for a in xor.domain.attributes[:2]), best)

        weights = scorer(self.housing, None)
        best = {self.housing.domain[attr].name for attr in weights.argsort()[-6:]}
        for feature in ('LSTAT', 'RM', 'AGE'):
            self.assertIn(feature, best)
Пример #27
0
    def test_improved_randomized_pca_dense_data(self):
        """Randomized PCA should work well on dense data."""
        random_state = check_random_state(42)

        # Let's take a tall, skinny matrix
        x_ = random_state.normal(0, 1, (100, 20))
        x = Table.from_numpy(Domain.from_numpy(x_), x_)

        pca = PCA(10, svd_solver="full", random_state=random_state)(x)
        rpca = PCA(10, svd_solver="randomized", random_state=random_state)(x)

        np.testing.assert_almost_equal(
            pca.components_, rpca.components_, decimal=8
        )
        np.testing.assert_almost_equal(
            pca.explained_variance_, rpca.explained_variance_, decimal=8
        )
        np.testing.assert_almost_equal(
            pca.singular_values_, rpca.singular_values_, decimal=8
        )

        # And take a short, fat matrix
        x_ = random_state.normal(0, 1, (20, 100))
        x = Table.from_numpy(Domain.from_numpy(x_), x_)

        pca = PCA(10, svd_solver="full", random_state=random_state)(x)
        rpca = PCA(10, svd_solver="randomized", random_state=random_state)(x)

        np.testing.assert_almost_equal(
            pca.components_, rpca.components_, decimal=8
        )
        np.testing.assert_almost_equal(
            pca.explained_variance_, rpca.explained_variance_, decimal=8
        )
        np.testing.assert_almost_equal(
            pca.singular_values_, rpca.singular_values_, decimal=8
        )
Пример #28
0
    def test_clusters_ordered_by_size(self):
        """Cluster names should be sorted based on the number of instances."""
        x1 = np.array([[0, 0]] * 20)
        x2 = np.array([[1, 0]] * 15)
        x3 = np.array([[0, 1]] * 10)
        x4 = np.array([[1, 1]] * 5)
        data = np.vstack((x1, x2, x3, x4))
        # Remove any order depencence in data, not that this should affect it
        np.random.shuffle(data)

        table = Table.from_numpy(domain=Domain.from_numpy(X=data), X=data)
        self.send_signal(self.widget.Inputs.data, table)
        self.widget.k_neighbours = 4
        self.widget.commit(force=True)
        output = self.get_output(self.widget.Outputs.annotated_data, wait=1000)

        clustering = output.get_column_view('Cluster')[0].astype(int)
        counts = np.bincount(clustering)
        np.testing.assert_equal(counts, sorted(counts, reverse=True))
Пример #29
0
    def __into_orange_table(self, attrs, X, meta_parts):
        if not attrs and X.shape[1]:
            attrs = Domain.from_numpy(X).attributes

        try:
            metas = None
            M = None
            if meta_parts:
                meta_parts = [df_.reset_index() if not df_.index.is_integer()
                              else df_ for df_ in meta_parts]
                metas, M = self.__guess_metas(meta_parts)

            domain = Domain(attrs, metas=metas)
            table = Table.from_numpy(domain, X, None, M)
        except ValueError:
            table = None
            rows = self.leading_cols if self.transposed else self.leading_rows
            cols = self.leading_rows if self.transposed else self.leading_cols
            self.errors["inadequate_headers"] = (rows, cols)
        return table
Пример #30
0
    def test_clusters_ordered_by_size(self):
        """Cluster names should be sorted based on the number of instances."""
        x1 = np.array([[0, 0]] * 20)
        x2 = np.array([[1, 0]] * 15)
        x3 = np.array([[0, 1]] * 10)
        x4 = np.array([[1, 1]] * 5)
        data = np.vstack((x1, x2, x3, x4))
        # Remove any order depencence in data, not that this should affect it
        np.random.shuffle(data)

        table = Table.from_numpy(domain=Domain.from_numpy(X=data), X=data)

        self.send_signal(self.widget.Inputs.data, table)
        self.widget.k_neighbors = 4
        self.commit_and_wait()
        output = self.get_output(self.widget.Outputs.annotated_data)

        clustering = output.get_column_view('Cluster')[0].astype(int)
        counts = np.bincount(clustering)
        np.testing.assert_equal(counts, sorted(counts, reverse=True))
Пример #31
0
    def test_rrelieff(self):
        X = np.random.random((100, 5))
        y = ((X[:, 0] > 0.5) ^ (X[:, 1] < 0.5) - 1).astype(float)
        xor = Table.from_numpy(Domain.from_numpy(X, y), X, y)

        scorer = RReliefF(random_state=42)
        weights = scorer(xor, None)
        best = {xor.domain[attr].name for attr in weights.argsort()[-2:]}
        self.assertSetEqual(set(a.name for a in xor.domain.attributes[:2]),
                            best)
        weights = scorer(self.housing, None)
        best = {
            self.housing.domain[attr].name
            for attr in weights.argsort()[-6:]
        }
        for feature in ("LSTAT", "RM"):
            self.assertIn(feature, best)

        np.testing.assert_array_equal(
            RReliefF(random_state=1)(self.housing, None),
            RReliefF(random_state=1)(self.housing, None),
        )
Пример #32
0
train_disc_pts1 = create_disc_pts(75, 2)
train_disc_pts2 = create_disc_pts(75, 3.5, 2)

plt.figure()
plt.scatter(train_disc_pts1[:, 0], train_disc_pts1[:, 1], c='r')
plt.scatter(train_disc_pts2[:, 0], train_disc_pts2[:, 1], c='b')
bound_ang = np.arange(0, 2 * np.pi, 0.01)
plt.plot(2 * np.cos(bound_ang), 2 * np.sin(bound_ang))
plt.xlim(-4, 4)
plt.ylim(-4, 4)
plt.show()

train_disc_pts = np.append(train_disc_pts1, train_disc_pts2, axis=0)
train_disc_pt_labels = np.append(np.zeros(75), np.ones(75))
train_disc_data_domain = Domain.from_numpy(train_disc_pts, train_disc_pt_labels)
train_disc_data_tab = Table.from_numpy(train_disc_data_domain, train_disc_pts,
train_disc_pt_labels)


print("###########TASK 5###################")
non_linear_learner = SVMLearner()
eval_results = CrossValidation(train_disc_data_tab, [non_linear_learner], k=10)
#Accuracy of cross validation: 0.960
#AUC: 0.959
print("Accuracy of cross validation: {:.3f}".format(scoring.CA(eval_results)[0]))
print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0]))


print("###########EXERCISE 1###############")
non_linear_learner = SVMLearner()