def test__prediction_to_row(self):
        repeat_nr = 0
        fold_nr = 0
        clf = sklearn.pipeline.Pipeline(
            steps=[('Imputer', Imputer(strategy='mean')
                    ), ('VarianceThreshold', VarianceThreshold(
                        threshold=0.05)), ('Estimator', GaussianNB())])
        task = openml.tasks.get_task(20)
        train, test = task.get_train_test_split_indices(repeat_nr, fold_nr)
        X, y = task.get_X_and_y()
        clf.fit(X[train], y[train])

        test_X = X[test]
        test_y = y[test]

        probaY = clf.predict_proba(test_X)
        predY = clf.predict(test_X)
        sample_nr = 0  # default for this task
        for idx in range(0, len(test_X)):
            arff_line = _prediction_to_row(repeat_nr, fold_nr, sample_nr, idx,
                                           task.class_labels[test_y[idx]],
                                           predY[idx], probaY[idx],
                                           task.class_labels, clf.classes_)

            self.assertIsInstance(arff_line, list)
            self.assertEqual(len(arff_line), 6 + len(task.class_labels))
            self.assertEqual(arff_line[0], repeat_nr)
            self.assertEqual(arff_line[1], fold_nr)
            self.assertEqual(arff_line[2], sample_nr)
            self.assertEqual(arff_line[3], idx)
            sum = 0.0
            for att_idx in range(4, 4 + len(task.class_labels)):
                self.assertIsInstance(arff_line[att_idx], float)
                self.assertGreaterEqual(arff_line[att_idx], 0.0)
                self.assertLessEqual(arff_line[att_idx], 1.0)
                sum += arff_line[att_idx]
            self.assertAlmostEqual(sum, 1.0)

            self.assertIn(arff_line[-1], task.class_labels)
            self.assertIn(arff_line[-2], task.class_labels)
        pass
Пример #2
0
    def test__prediction_to_row(self):
        repeat_nr = 0
        fold_nr = 0
        clf = sklearn.pipeline.Pipeline(steps=[('Imputer', Imputer(strategy='mean')),
                                               ('VarianceThreshold', VarianceThreshold(threshold=0.05)),
                                               ('Estimator', GaussianNB())])
        task = openml.tasks.get_task(20)
        train, test = task.get_train_test_split_indices(repeat_nr, fold_nr)
        X, y = task.get_X_and_y()
        clf.fit(X[train], y[train])

        test_X = X[test]
        test_y = y[test]

        probaY = clf.predict_proba(test_X)
        predY = clf.predict(test_X)
        for idx in range(0, len(test_X)):
            arff_line = _prediction_to_row(repeat_nr, fold_nr, idx,
                                           task.class_labels[test_y[idx]],
                                           predY[idx], probaY[idx], task.class_labels, clf.classes_)

            self.assertIsInstance(arff_line, list)
            self.assertEqual(len(arff_line), 5 + len(task.class_labels))
            self.assertEqual(arff_line[0], repeat_nr)
            self.assertEqual(arff_line[1], fold_nr)
            self.assertEqual(arff_line[2], idx)
            sum = 0.0
            for att_idx in range(3, 3 + len(task.class_labels)):
                self.assertIsInstance(arff_line[att_idx], float)
                self.assertGreaterEqual(arff_line[att_idx], 0.0)
                self.assertLessEqual(arff_line[att_idx], 1.0)
                sum += arff_line[att_idx]
            self.assertAlmostEqual(sum, 1.0)

            self.assertIn(arff_line[-1], task.class_labels)
            self.assertIn(arff_line[-2], task.class_labels)
        pass