Пример #1
0
    def test_store_models(self):
        nrows, ncols = 50, 10
        t = random_data(nrows, ncols)
        fitters = [naive_bayes.BayesLearner(), majority.MajorityFitter()]

        cv = testing.TestOnTrainingData()
        res = cv(t, fitters)
        self.assertIsNone(res.models)

        cv = testing.TestOnTrainingData(store_models=True)
        res = cv(t, fitters)
        self.assertEqual(len(res.models), 1)
        for models in res.models:
            self.assertEqual(len(models), 2)
            self.assertIsInstance(models[0], naive_bayes.BayesClassifier)
            self.assertIsInstance(models[1], majority.ConstantClassifier)

        cv = testing.TestOnTrainingData()
        res = cv(t, fitters)
        self.assertIsNone(res.models)

        res = testing.TestOnTrainingData(t, fitters, store_models=True)
        self.assertEqual(len(res.models), 1)
        for models in res.models:
            self.assertEqual(len(models), 2)
            self.assertIsInstance(models[0], naive_bayes.BayesClassifier)
            self.assertIsInstance(models[1], majority.ConstantClassifier)
Пример #2
0
    def test_bayes(self):
        x = np.random.random_integers(1, 3, (100, 5))
        col = np.random.randint(5)
        y = x[:, col].copy().reshape(100, 1)
        t = Table(x, y)
        t = discretization.DiscretizeTable(t, method=EqualWidth(n=3))

        res = testing.TestOnTrainingData(t, [naive_bayes.BayesLearner()])
        np.testing.assert_almost_equal(scoring.CA(res), [1])

        t.Y[-20:] = 4 - t.Y[-20:]
        res = testing.TestOnTrainingData(t, [naive_bayes.BayesLearner()])
        self.assertGreaterEqual(scoring.CA(res)[0], 0.75)
        self.assertLess(scoring.CA(res)[0], 1)
Пример #3
0
    def test_miss_majority(self):
        x = np.zeros((50, 3))
        y = x[:, -1]
        x[49] = 1
        data = Table(x, y)
        res = testing.TestOnTrainingData(data, [majority.MajorityFitter()])
        np.testing.assert_equal(res.predicted[0][:49], 0)

        x[49] = 0
        res = testing.TestOnTrainingData(data, [majority.MajorityFitter()])
        np.testing.assert_equal(res.predicted[0][:49], 0)

        x[25:] = 1
        y = x[:, -1]
        data = Table(x, y)
        res = testing.TestOnTrainingData(data, [majority.MajorityFitter()])
        np.testing.assert_equal(res.predicted[0], res.predicted[0][0])
Пример #4
0
    def test_store_data(self):
        nrows, ncols = 50, 10
        t = random_data(nrows, ncols)
        fitters = [naive_bayes.BayesLearner()]

        cv = testing.TestOnTrainingData()
        res = cv(t, fitters)
        self.assertIsNone(res.data)

        cv = testing.TestOnTrainingData(store_data=True)
        res = cv(t, fitters)
        self.assertIs(res.data, t)

        res = testing.TestOnTrainingData(t, fitters)
        self.assertIsNone(res.data)

        res = testing.TestOnTrainingData(t, fitters, store_data=True)
        self.assertIs(res.data, t)
Пример #5
0
    def test_probs(self):
        data = Table('iris')[30:130]
        fitters = [majority.MajorityFitter(), majority.MajorityFitter()]

        results = testing.TestOnTrainingData(k=10)(data, fitters)

        self.assertEqual(results.predicted.shape, (2, len(data)))
        np.testing.assert_equal(results.predicted, np.ones((2, 100)))
        probs = results.probabilities
        self.assertTrue((probs[:, :, 0] < probs[:, :, 2]).all())
        self.assertTrue((probs[:, :, 2] < probs[:, :, 1]).all())
Пример #6
0
 def test_results(self):
     nrows, ncols = 50, 10
     t = random_data(nrows, ncols)
     res = testing.TestOnTrainingData(t, [naive_bayes.BayesLearner()])
     y = t.Y
     np.testing.assert_equal(res.actual, y[res.row_indices].reshape(nrows))
     np.testing.assert_equal(res.predicted[0],
                             y[res.row_indices].reshape(nrows))
     np.testing.assert_equal(np.argmax(res.probabilities[0], axis=1),
                             y[res.row_indices].reshape(nrows))
     np.testing.assert_equal(res.row_indices, np.arange(nrows))
Пример #7
0
    def update_results(self):
        # items in need of an update
        items = [(key, input) for key, input in self.learners.items()
                 if input.results is None]
        learners = [input.learner for _, input in items]

        self.setStatusMessage("Running")

        # TODO: Test each learner individually

        if self.resampling == OWTestLearners.KFold:
            results = testing.CrossValidation(self.train_data,
                                              learners,
                                              k=self.k_folds,
                                              store_data=True)
        elif self.resampling == OWTestLearners.LeaveOneOut:
            results = testing.LeaveOneOut(self.train_data,
                                          learners,
                                          store_data=True)
        elif self.resampling == OWTestLearners.Bootstrap:
            p = self.sample_p / 100.0
            results = testing.Bootstrap(self.train_data,
                                        learners,
                                        n_resamples=self.n_repeat,
                                        p=p,
                                        store_data=True)
        elif self.resampling == OWTestLearners.TestOnTrain:
            results = testing.TestOnTrainingData(self.train_data,
                                                 learners,
                                                 store_data=True)
        elif self.resampling == OWTestLearners.TestOnTest:
            assert self.test_data is not None
            results = testing.TestOnTestData(self.train_data,
                                             self.test_data,
                                             learners,
                                             store_data=True)
        else:
            assert False

        results = list(split_by_model(results))
        class_var = self.train_data.domain.class_var

        if is_discrete(class_var):
            test_stats = classification_stats
        else:
            test_stats = regression_stats

        stats = [test_stats(res) for res in results]
        for (key, input), res, stat in zip(items, results, stats):
            self.learners[key] = input._replace(results=res, stats=stat)

        self.setStatusMessage("")
        self._update_stats_model()
Пример #8
0
    def update_results(self):
        self.warning([1, 2])
        self.error(2)

        if self.train_data is None:
            return

        if self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                self.warning(2, "Missing separate test data input")
                return

            elif self.test_data.domain.class_var != \
                    self.train_data.domain.class_var:
                self.error(2, ("Inconsistent class variable between test " +
                               "and train data sets"))
                return

        # items in need of an update
        items = [(key, input) for key, input in self.learners.items()
                 if input.results is None]
        learners = [input.learner for _, input in items]

        self.setStatusMessage("Running")
        if self.test_data is not None and \
                self.resampling != OWTestLearners.TestOnTest:
            self.warning(1, "Test data is present but unused. "
                            "Select 'Test on test data' to use it.")

        # TODO: Test each learner individually

        if self.resampling == OWTestLearners.KFold:
            results = testing.CrossValidation(
                self.train_data, learners, k=self.k_folds, store_data=True
            )
        elif self.resampling == OWTestLearners.LeaveOneOut:
            results = testing.LeaveOneOut(
                self.train_data, learners, store_data=True
            )
        elif self.resampling == OWTestLearners.Bootstrap:
            p = self.sample_p / 100.0
            results = testing.Bootstrap(
                self.train_data, learners, n_resamples=self.n_repeat, p=p,
                store_data=True
            )
        elif self.resampling == OWTestLearners.TestOnTrain:
            results = testing.TestOnTrainingData(
                self.train_data, learners, store_data=True
            )
        elif self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                return
            results = testing.TestOnTestData(
                self.train_data, self.test_data, learners, store_data=True
            )
        else:
            assert False

        results = list(split_by_model(results))
        class_var = self.train_data.domain.class_var
        
        if is_discrete(class_var):
            test_stats = classification_stats
        else:
            test_stats = regression_stats
        
        self._update_header()
        
        stats = [test_stats(res) for res in results]
        for (key, input), res, stat in zip(items, results, stats):
            self.learners[key] = input._replace(results=res, stats=stat)

        self.setStatusMessage("")
        
        self._update_stats_model()