Пример #1
0
 def test_query(self):
     ensemble_classifiers = [
         SklearnClassifier(classes=self.classes,
                           estimator=GaussianProcessClassifier()),
         SklearnClassifier(classes=self.classes,
                           estimator=GaussianProcessClassifier()),
         SklearnClassifier(classes=self.classes,
                           estimator=GaussianProcessClassifier()),
     ]
     gpc = PWC(classes=self.classes)
     ensemble_bagging = SklearnClassifier(
         estimator=BaggingClassifier(base_estimator=gpc),
         classes=self.classes)
     ensemble_voting = SklearnClassifier(
         VotingClassifier(estimators=ensemble_classifiers, voting='soft'))
     ensemble_list = [
         self.ensemble, ensemble_classifiers, ensemble_bagging,
         ensemble_voting
     ]
     for ensemble in ensemble_list:
         for method in ['KL_divergence', 'vote_entropy']:
             selector = QBC(method=method)
             idx, u = selector.query(X_cand=self.X_cand,
                                     ensemble=ensemble,
                                     X=self.X,
                                     y=self.y,
                                     return_utilities=True)
             self.assertEqual(len(idx), 1)
             self.assertEqual(len(u), 1)
 def test_init_param_estimator(self):
     clf = SklearnClassifier(estimator='Test')
     self.assertEqual(clf.estimator, 'Test')
     clf = SklearnClassifier(estimator='Test')
     self.assertEqual(clf.estimator, 'Test')
     clf = SklearnClassifier(missing_label='nan',
                             estimator=GaussianProcessRegressor())
     self.assertRaises(TypeError, clf.fit, X=self.X, y=self.y1)
 def test_fit(self):
     pwc = PWC(classes=[1, 2])
     gnb = SklearnClassifier(GaussianNB(), classes=[1, 2])
     clf = MultiAnnotEnsemble(estimators=[('PWC', pwc)], classes=[1, 2])
     np.testing.assert_array_equal(clf.classes, gnb.classes)
     np.testing.assert_array_equal(clf.classes, pwc.classes)
     pwc = PWC(classes=np.arange(3))
     gnb = SklearnClassifier(GaussianNB(), classes=np.arange(3))
     clf = MultiAnnotEnsemble(estimators=[('PWC', pwc), ('GNB', gnb)],
                              voting='soft',
                              classes=np.arange(3))
     self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y[:, 0])
    def test_epistemic_uncertainty_logreg(self):
        clf = SklearnClassifier(LogisticRegression(),
                                classes=[0, 1, 2],
                                random_state=self.random_state)
        self.assertRaises(ValueError,
                          _epistemic_uncertainty_logreg,
                          **self.kwargs,
                          clf=clf)

        clf = SklearnClassifier(DecisionTreeClassifier(),
                                classes=[0, 1],
                                random_state=self.random_state)
        self.assertRaises(TypeError,
                          _epistemic_uncertainty_logreg,
                          **self.kwargs,
                          clf=clf)

        self.assertRaises(TypeError,
                          _epistemic_uncertainty_logreg,
                          **self.kwargs,
                          clf=self.clf)

        probas = np.array([[0.5, 0.5]])
        X = np.array([[0]])
        X_cand = np.array([[3]])
        y = np.array([0])
        #utils_expected = np.array()
        clf = SklearnClassifier(LogisticRegression(), classes=[0, 1])
        clf.fit(X, y)
        utils = _epistemic_uncertainty_logreg(X_cand, X, y, clf, probas)
Пример #5
0
    def setUp(self):
        self.MISSING_LABEL = MISSING_LABEL
        self.X, self.y_true = make_blobs(n_samples=10,
                                         n_features=2,
                                         centers=2,
                                         cluster_std=1,
                                         random_state=1)
        self.budget = 5
        self.clf = PWC(classes=np.unique(self.y_true),
                       missing_label=MISSING_LABEL,
                       random_state=0)
        self.cmm = CMM(classes=np.unique(self.y_true),
                       missing_label=MISSING_LABEL,
                       random_state=0)
        self.ensemble = SklearnClassifier(
            classes=np.unique(self.y_true),
            missing_label=MISSING_LABEL,
            estimator=RandomForestClassifier(random_state=0),
            random_state=0)

        self.y_missing_label = np.full(self.y_true.shape, self.MISSING_LABEL)
        self.y = self.y_true.copy()
        self.y[:3] = self.y_true[:3]
        self.query_strategies = {}
        for qs_name in pool.__all__:
            qs = getattr(pool, qs_name)
            if inspect.isclass(qs) and \
                    issubclass(qs, SingleAnnotPoolBasedQueryStrategy):
                self.query_strategies[qs_name] = qs
        print(self.query_strategies.keys())
 def test_init_param_voting(self):
     pwc = PWC()
     gnb = SklearnClassifier(GaussianNB())
     estimators = [('pwc', pwc), ('gnb', gnb)]
     clf = MultiAnnotEnsemble(estimators=estimators, voting='Test')
     self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y)
     clf = MultiAnnotEnsemble(estimators=estimators, voting=1)
     self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y)
 def test_query_param_clf(self):
     selector = EpistemicUncertainty()
     dt = SklearnClassifier(DecisionTreeClassifier())
     for clf in [None, 'string', 1, dt]:
         self.assertRaises(TypeError,
                           selector.query,
                           **self.kwargs,
                           clf=clf)
Пример #8
0
 def setUp(self):
     self.random_state = 41
     self.X_cand = [[8, 1, 6, 8], [9, 1, 6, 5], [5, 1, 6, 5]]
     self.X = [[1, 2, 5, 9], [5, 8, 4, 6], [8, 4, 5, 9], [5, 4, 8, 5]]
     self.y = [0., 0., 1., 1.]
     self.classes = [0, 1]
     self.ensemble = SklearnClassifier(
         estimator=RandomForestClassifier(random_state=0),
         classes=self.classes,
         random_state=self.random_state)
    def test_query(self):
        selector = EpistemicUncertainty()

        # return_utilities
        L = list(
            selector.query(**self.kwargs, clf=self.clf, return_utilities=True))
        self.assertTrue(len(L) == 2)
        L = list(
            selector.query(**self.kwargs, clf=self.clf,
                           return_utilities=False))
        self.assertTrue(len(L) == 1)

        # batch_size
        bs = 3
        selector = EpistemicUncertainty()
        best_idx = selector.query(**self.kwargs, clf=self.clf, batch_size=bs)
        self.assertEqual(bs, len(best_idx))

        # query - PWC
        clf = PWC(classes=self.classes, random_state=self.random_state)
        selector = EpistemicUncertainty()
        selector.query(**self.kwargs, clf=clf)
        selector.query(**self.kwargs_MISSING_LABEL, clf=clf)

        best_indices, utilities = selector.query(**self.kwargs,
                                                 clf=clf,
                                                 return_utilities=True)
        self.assertEqual(utilities.shape, (1, len(self.X_cand)))
        self.assertEqual(best_indices.shape, (1, ))

        # query - logistic regression
        clf = SklearnClassifier(LogisticRegression(),
                                classes=self.classes,
                                random_state=self.random_state)

        selector = EpistemicUncertainty()
        selector.query(**self.kwargs, clf=clf)
        selector.query(**self.kwargs_MISSING_LABEL, clf=clf)

        best_indices, utilities = selector.query(**self.kwargs,
                                                 clf=clf,
                                                 return_utilities=True)
        self.assertEqual(utilities.shape, (1, len(self.X_cand)))
        self.assertEqual(best_indices.shape, (1, ))

        best_indices_s, utilities_s = selector.query(**self.kwargs,
                                                     clf=clf,
                                                     return_utilities=True,
                                                     sample_weight=[
                                                         0.5, 1, 1, 1
                                                     ])
        comp = utilities_s == utilities
        self.assertTrue(not comp.all())
 def test_predict_proba(self):
     pwc = PWC()
     gnb = SklearnClassifier(GaussianNB())
     clf = MultiAnnotEnsemble(estimators=[('PWC', pwc), ('GNB', gnb)],
                              voting='soft')
     self.assertRaises(NotFittedError, clf.predict_proba, X=self.X)
     clf.fit(X=self.X, y=self.y)
     P = clf.predict_proba(X=self.X)
     np.testing.assert_allclose(np.ones(len(P)), P.sum(axis=1))
     clf.voting = 'hard'
     clf.fit(X=self.X, y=self.y)
     P = clf.predict_proba(X=self.X)
     np.testing.assert_allclose(np.ones(len(P)), P.sum(axis=1))
Пример #11
0
 def setUp(self):
     self.classes = [0, 1]
     self.random_state = 1
     self.X_cand = np.array([[8, 1], [9, 1], [5, 1]])
     self.X = np.array([[1, 2], [5, 8], [8, 4], [5, 4]])
     self.y = np.array([0, 0, 1, 1])
     self.clf = SklearnClassifier(GaussianProcessClassifier(),
                                  classes=self.classes)
     self.cost_matrix = np.eye(2)
     self.kwargs = dict(X_cand=self.X_cand,
                        clf=self.clf,
                        X=self.X,
                        y=self.y)
Пример #12
0
    def test_query(self):
        compare_list = []
        clf = SklearnClassifier(estimator=GaussianProcessClassifier(),
                                random_state=self.random_state,
                                classes=self.classes)

        selector = UncertaintySampling()

        # return_utilities
        L = list(selector.query(**self.kwargs, return_utilities=True))
        self.assertTrue(len(L) == 2)
        L = list(selector.query(**self.kwargs, return_utilities=False))
        self.assertTrue(len(L) == 1)

        # batch_size
        bs = 3
        selector = UncertaintySampling()
        best_idx = selector.query(**self.kwargs, batch_size=bs)
        self.assertEqual(bs, len(best_idx))

        # query
        selector = UncertaintySampling(method='entropy')
        selector.query(X_cand=[[1]], clf=clf, X=[[1]], y=[MISSING_LABEL])
        compare_list.append(selector.query(**self.kwargs))

        selector = UncertaintySampling(method='margin_sampling')
        selector.query(X_cand=[[1]], clf=clf, X=[[1]], y=[MISSING_LABEL])
        compare_list.append(selector.query(**self.kwargs))

        selector = UncertaintySampling(method='least_confident')
        selector.query(X_cand=[[1]], clf=clf, X=[[1]], y=[MISSING_LABEL])
        compare_list.append(selector.query(**self.kwargs))

        selector = UncertaintySampling(method='margin_sampling',
                                       cost_matrix=[[0, 1], [1, 0]])
        selector.query(X_cand=[[1]], clf=clf, X=[[1]], y=[MISSING_LABEL])

        selector = UncertaintySampling(method='least_confident',
                                       cost_matrix=[[0, 1], [1, 0]])
        selector.query(X_cand=[[1]], clf=clf, X=[[1]], y=[MISSING_LABEL])

        for x in compare_list:
            self.assertEqual(compare_list[0], x)

        selector = UncertaintySampling(method='expected_average_precision')
        selector.query(X_cand=[[1]], clf=clf, X=[[1]], y=[MISSING_LABEL])
        best_indices, utilities = selector.query(**self.kwargs,
                                                 return_utilities=True)
        self.assertEqual(utilities.shape, (1, len(self.X_cand)))
        self.assertEqual(best_indices.shape, (1, ))
Пример #13
0
 def test_query_param_ensemble(self):
     selector = QBC()
     ensemble_list = [
         None, 'test', 1,
         GaussianProcessClassifier(),
         SklearnClassifier(GaussianProcessClassifier, classes=self.classes),
         PWC(classes=self.classes)
     ]
     for ensemble in ensemble_list:
         self.assertRaises(TypeError,
                           selector.query,
                           X_cand=self.X_cand,
                           X=self.X,
                           y=self.y,
                           ensemble=ensemble)
Пример #14
0
 def test_query_param_clf(self):
     al4ds = FourDS()
     self.assertRaises(TypeError,
                       al4ds.query,
                       X_cand=self.X,
                       clf=None,
                       X=self.X,
                       y=self.y)
     clf = SklearnClassifier(GaussianProcessClassifier())
     al4ds = FourDS()
     self.assertRaises(TypeError,
                       al4ds.query,
                       X_cand=self.X,
                       clf=clf,
                       X=self.X,
                       y=self.y)
 def test_predict(self):
     clf = SklearnClassifier(estimator=GaussianProcessClassifier(),
                             missing_label='nan')
     self.assertRaises(NotFittedError, clf.predict, X=self.X)
     clf.fit(X=self.X, y=self.y1)
     y = clf.predict(X=self.X)
     est = GaussianProcessClassifier().fit(X=np.zeros((3, 1)),
                                           y=['tokyo', 'paris', 'tokyo'])
     y_exp = est.predict(X=self.X)
     np.testing.assert_array_equal(y, y_exp)
     np.testing.assert_array_equal(clf.classes_, est.classes_)
     clf.fit(X=self.X, y=self.y2)
     with warnings.catch_warnings(record=True) as w:
         warnings.simplefilter("always")
         y = clf.predict(X=self.X)
         self.assertEqual(len(w), 1)
     y_exp = ['tokyo'] * len(self.X)
     np.testing.assert_array_equal(y_exp, y)
 def test_init_param_estimators(self):
     clf = MultiAnnotEnsemble(estimators='Test')
     self.assertEqual(clf.estimators, 'Test')
     self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y)
     clf = MultiAnnotEnsemble(estimators=None)
     self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y)
     clf = MultiAnnotEnsemble(estimators=[('GNB', GaussianNB())])
     self.assertRaises(TypeError, clf.fit, X=self.X, y=self.y)
     clf = MultiAnnotEnsemble(estimators=[('PWC', PWC(missing_label=0))])
     self.assertRaises(TypeError, clf.fit, X=self.X, y=self.y)
     clf = MultiAnnotEnsemble(estimators=[('PWC', PWC(missing_label='a'))])
     self.assertRaises(TypeError, clf.fit, X=self.X, y=self.y)
     clf = MultiAnnotEnsemble(classes=[0, 1],
                              estimators=[('PWC', PWC(classes=[0, 2]))])
     self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y)
     clf = MultiAnnotEnsemble(estimators=[('PWC', PWC(classes=[0, 1]))])
     self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y)
     perc = SklearnClassifier(Perceptron())
     clf = MultiAnnotEnsemble(estimators=[('perc', perc)], voting='soft')
     self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y)
 def test_predict(self):
     pwc = PWC(random_state=0)
     gnb = SklearnClassifier(GaussianNB(), random_state=0)
     clf = MultiAnnotEnsemble(estimators=[('PWC', pwc), ('GNB', gnb)],
                              voting='soft',
                              random_state=0)
     self.assertRaises(NotFittedError, clf.predict, X=self.X)
     clf.fit(X=self.X, y=self.y)
     y_pred_soft = clf.predict(X=self.X)
     self.assertEqual(len(y_pred_soft), len(self.X))
     self.assertTrue(clf.score(self.X, self.y_true), 0.8)
     clf.voting = 'hard'
     clf.fit(X=self.X, y=self.y)
     y_pred_hard = clf.predict(X=self.X)
     self.assertEqual(len(y_pred_hard), len(self.X))
     self.assertTrue(clf.score(self.X, self.y_true), 0.8)
     clf.fit(X=self.X, y=self.y, sample_weight=np.ones_like(self.y))
     y_pred_hard = clf.predict(X=self.X)
     self.assertEqual(len(y_pred_hard), len(self.X))
     self.assertTrue(clf.score(self.X, self.y_true), 0.8)
 def test_partial_fit(self):
     clf = SklearnClassifier(estimator=GaussianNB(),
                             classes=['tokyo', 'paris', 'new york'],
                             missing_label='nan')
     self.assertRaises(NotFittedError, check_is_fitted, estimator=clf)
     clf.partial_fit(self.X, self.y1)
     self.assertTrue(clf.is_fitted_)
     self.assertTrue(hasattr(clf, 'class_count_'))
     np.testing.assert_array_equal(clf.classes_,
                                   ['new york', 'paris', 'tokyo'])
     self.assertEqual(clf.missing_label, 'nan')
     clf.partial_fit(self.X, self.y2, sample_weight=np.ones_like(self.y2))
     self.assertTrue(clf.is_fitted_)
     self.assertFalse(hasattr(clf, "kernel_"))
     self.assertTrue(hasattr(clf, 'partial_fit'))
Пример #19
0
    def test_query(self):
        classes = [0, 1]
        X_cand = np.array([[8, 1], [9, 1]])
        X = np.array([[1, 2], [5, 8], [8, 4], [5, 4]])
        y = np.array([MISSING_LABEL, 0, 1, MISSING_LABEL])
        cost_matrix = 1 - np.eye(2)
        clf_partial = SklearnClassifier(GaussianNB(),
                                        classes=classes).fit(X, y)
        clf = SklearnClassifier(estimator=GaussianProcessClassifier(),
                                random_state=self.random_state,
                                classes=classes)
        qs = VOI()

        # return_utilities
        L = list(qs.query(**self.kwargs, return_utilities=True))
        self.assertTrue(len(L) == 2)
        L = list(qs.query(**self.kwargs, return_utilities=False))
        self.assertTrue(len(L) == 1)

        # batch_size
        bs = 2
        best_idx = qs.query(**self.kwargs, batch_size=bs)
        self.assertEqual(bs, len(best_idx))

        # query
        qs.query(X_cand=X_cand, clf=clf_partial, X=X, y=y)
        qs = VOI()
        qs.query(X_cand=X_cand, clf=clf_partial, X=X, y=y)

        class DummyClf(SkactivemlClassifier):
            def fit(self, X, y, sample_weight=None):
                self.classes_ = np.unique(y[labeled_indices(y)])
                return self

            def predict_proba(self, X):
                return np.full(shape=(len(X), len(self.classes_)),
                               fill_value=0.5)

        labeling_cost = 2.345
        qs = VOI(cost_matrix=cost_matrix, labeling_cost=labeling_cost)
        idxs, utils = qs.query(X_cand=X_cand,
                               clf=DummyClf(),
                               X=X,
                               y=y,
                               return_utilities=True)
        np.testing.assert_array_equal(utils[0],
                                      [-labeling_cost, -labeling_cost])

        labeling_cost = np.array([2.346, 6.234])
        qs = VOI(cost_matrix=cost_matrix, labeling_cost=labeling_cost)
        idxs, utils = qs.query(X_cand=X_cand,
                               clf=DummyClf(),
                               X=X,
                               y=y,
                               return_utilities=True)
        np.testing.assert_array_equal(utils[0], -labeling_cost)

        labeling_cost = np.array([[2.346, 6.234]])
        expected = [-labeling_cost.mean(), -labeling_cost.mean()]
        qs = VOI(cost_matrix=cost_matrix, labeling_cost=labeling_cost)
        idxs, utils = qs.query(X_cand=X_cand,
                               clf=DummyClf(),
                               X=X,
                               y=y,
                               return_utilities=True)
        np.testing.assert_array_equal(utils[0], expected)

        labeling_cost = np.array([[2.346, 6.234], [3.876, 3.568]])
        expected = -labeling_cost.mean(axis=1)
        qs = VOI(cost_matrix=cost_matrix, labeling_cost=labeling_cost)
        idxs, utils = qs.query(X_cand=X_cand,
                               clf=DummyClf(),
                               X=X,
                               y=y,
                               return_utilities=True)
        np.testing.assert_array_equal(utils[0], expected)
 def test_predict_proba(self):
     clf = SklearnClassifier(estimator=GaussianProcessClassifier(),
                             missing_label='nan')
     self.assertRaises(NotFittedError, clf.predict_proba, X=self.X)
     clf.fit(X=self.X, y=self.y1)
     P = clf.predict_proba(X=self.X)
     est = GaussianProcessClassifier().fit(X=np.zeros((3, 1)),
                                           y=['tokyo', 'paris', 'tokyo'])
     P_exp = est.predict_proba(X=self.X)
     np.testing.assert_array_equal(P_exp, P)
     np.testing.assert_array_equal(clf.classes_, est.classes_)
     clf.fit(X=self.X, y=self.y2)
     with warnings.catch_warnings(record=True) as w:
         warnings.simplefilter("always")
         P = clf.predict_proba(X=self.X)
         self.assertEqual(len(w), 1)
     P_exp = np.ones((len(self.X), 1))
     np.testing.assert_array_equal(P_exp, P)
     clf = SklearnClassifier(estimator=GaussianProcessClassifier(),
                             classes=['ny', 'paris', 'tokyo'],
                             missing_label='nan')
     clf.fit(X=self.X, y=self.y_nan)
     P = clf.predict_proba(X=self.X)
     P_exp = np.ones((len(self.X), 3)) / 3
     np.testing.assert_array_equal(P_exp, P)
     clf.fit(X=self.X, y=self.y1)
     P = clf.predict_proba(X=self.X)
     P_exp = np.zeros((len(self.X), 3))
     P_exp[:, 1:] = est.predict_proba(X=self.X)
     np.testing.assert_array_equal(P_exp, P)
    def test_fit(self):
        clf = SklearnClassifier(estimator=GaussianProcessClassifier(),
                                missing_label='nan',
                                classes=['tokyo', 'paris'],
                                random_state=0)
        np.testing.assert_array_equal(['tokyo', 'paris'], clf.classes)
        self.assertEqual(clf.kernel, clf.estimator.kernel)
        self.assertFalse(hasattr(clf, 'kernel_'))
        clf = SklearnClassifier(estimator=Perceptron(),
                                missing_label='nan',
                                cost_matrix=1 - np.eye(2),
                                classes=['tokyo', 'paris'],
                                random_state=0)
        self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y1)
        clf = SklearnClassifier(estimator=GaussianProcessClassifier())
        self.assertRaises(NotFittedError, check_is_fitted, estimator=clf)
        clf = SklearnClassifier(estimator=GaussianProcessClassifier(),
                                classes=['tokyo', 'paris', 'new york'],
                                missing_label='nan')
        self.assertRaises(NotFittedError, check_is_fitted, estimator=clf)
        clf.fit(self.X, self.y1)
        self.assertTrue(clf.is_fitted_)
        self.assertTrue(hasattr(clf, 'kernel_'))
        np.testing.assert_array_equal(clf.classes_,
                                      ['new york', 'paris', 'tokyo'])
        self.assertEqual(clf.missing_label, 'nan')
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            clf.fit(self.X, self.y2)
            self.assertEqual(len(w), 1)
        self.assertFalse(clf.is_fitted_)
        self.assertFalse(hasattr(clf, "kernel_"))
        self.assertFalse(hasattr(clf, 'partial_fit'))

        X = [[1], [0]]
        y_true = [1, 0]
        clf = SklearnClassifier(GaussianProcessClassifier(), classes=[0, 1])
        ensemble = SklearnClassifier(BaggingClassifier(clf), classes=[0, 1])
        ensemble.fit(X, y_true)
        self.assertTrue(ensemble.is_fitted_, True)