def test_query(self): ensemble_classifiers = [ SklearnClassifier(classes=self.classes, estimator=GaussianProcessClassifier()), SklearnClassifier(classes=self.classes, estimator=GaussianProcessClassifier()), SklearnClassifier(classes=self.classes, estimator=GaussianProcessClassifier()), ] gpc = PWC(classes=self.classes) ensemble_bagging = SklearnClassifier( estimator=BaggingClassifier(base_estimator=gpc), classes=self.classes) ensemble_voting = SklearnClassifier( VotingClassifier(estimators=ensemble_classifiers, voting='soft')) ensemble_list = [ self.ensemble, ensemble_classifiers, ensemble_bagging, ensemble_voting ] for ensemble in ensemble_list: for method in ['KL_divergence', 'vote_entropy']: selector = QBC(method=method) idx, u = selector.query(X_cand=self.X_cand, ensemble=ensemble, X=self.X, y=self.y, return_utilities=True) self.assertEqual(len(idx), 1) self.assertEqual(len(u), 1)
def test_init_param_estimator(self): clf = SklearnClassifier(estimator='Test') self.assertEqual(clf.estimator, 'Test') clf = SklearnClassifier(estimator='Test') self.assertEqual(clf.estimator, 'Test') clf = SklearnClassifier(missing_label='nan', estimator=GaussianProcessRegressor()) self.assertRaises(TypeError, clf.fit, X=self.X, y=self.y1)
def test_fit(self): pwc = PWC(classes=[1, 2]) gnb = SklearnClassifier(GaussianNB(), classes=[1, 2]) clf = MultiAnnotEnsemble(estimators=[('PWC', pwc)], classes=[1, 2]) np.testing.assert_array_equal(clf.classes, gnb.classes) np.testing.assert_array_equal(clf.classes, pwc.classes) pwc = PWC(classes=np.arange(3)) gnb = SklearnClassifier(GaussianNB(), classes=np.arange(3)) clf = MultiAnnotEnsemble(estimators=[('PWC', pwc), ('GNB', gnb)], voting='soft', classes=np.arange(3)) self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y[:, 0])
def test_epistemic_uncertainty_logreg(self): clf = SklearnClassifier(LogisticRegression(), classes=[0, 1, 2], random_state=self.random_state) self.assertRaises(ValueError, _epistemic_uncertainty_logreg, **self.kwargs, clf=clf) clf = SklearnClassifier(DecisionTreeClassifier(), classes=[0, 1], random_state=self.random_state) self.assertRaises(TypeError, _epistemic_uncertainty_logreg, **self.kwargs, clf=clf) self.assertRaises(TypeError, _epistemic_uncertainty_logreg, **self.kwargs, clf=self.clf) probas = np.array([[0.5, 0.5]]) X = np.array([[0]]) X_cand = np.array([[3]]) y = np.array([0]) #utils_expected = np.array() clf = SklearnClassifier(LogisticRegression(), classes=[0, 1]) clf.fit(X, y) utils = _epistemic_uncertainty_logreg(X_cand, X, y, clf, probas)
def setUp(self): self.MISSING_LABEL = MISSING_LABEL self.X, self.y_true = make_blobs(n_samples=10, n_features=2, centers=2, cluster_std=1, random_state=1) self.budget = 5 self.clf = PWC(classes=np.unique(self.y_true), missing_label=MISSING_LABEL, random_state=0) self.cmm = CMM(classes=np.unique(self.y_true), missing_label=MISSING_LABEL, random_state=0) self.ensemble = SklearnClassifier( classes=np.unique(self.y_true), missing_label=MISSING_LABEL, estimator=RandomForestClassifier(random_state=0), random_state=0) self.y_missing_label = np.full(self.y_true.shape, self.MISSING_LABEL) self.y = self.y_true.copy() self.y[:3] = self.y_true[:3] self.query_strategies = {} for qs_name in pool.__all__: qs = getattr(pool, qs_name) if inspect.isclass(qs) and \ issubclass(qs, SingleAnnotPoolBasedQueryStrategy): self.query_strategies[qs_name] = qs print(self.query_strategies.keys())
def test_init_param_voting(self): pwc = PWC() gnb = SklearnClassifier(GaussianNB()) estimators = [('pwc', pwc), ('gnb', gnb)] clf = MultiAnnotEnsemble(estimators=estimators, voting='Test') self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y) clf = MultiAnnotEnsemble(estimators=estimators, voting=1) self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y)
def test_query_param_clf(self): selector = EpistemicUncertainty() dt = SklearnClassifier(DecisionTreeClassifier()) for clf in [None, 'string', 1, dt]: self.assertRaises(TypeError, selector.query, **self.kwargs, clf=clf)
def setUp(self): self.random_state = 41 self.X_cand = [[8, 1, 6, 8], [9, 1, 6, 5], [5, 1, 6, 5]] self.X = [[1, 2, 5, 9], [5, 8, 4, 6], [8, 4, 5, 9], [5, 4, 8, 5]] self.y = [0., 0., 1., 1.] self.classes = [0, 1] self.ensemble = SklearnClassifier( estimator=RandomForestClassifier(random_state=0), classes=self.classes, random_state=self.random_state)
def test_query(self): selector = EpistemicUncertainty() # return_utilities L = list( selector.query(**self.kwargs, clf=self.clf, return_utilities=True)) self.assertTrue(len(L) == 2) L = list( selector.query(**self.kwargs, clf=self.clf, return_utilities=False)) self.assertTrue(len(L) == 1) # batch_size bs = 3 selector = EpistemicUncertainty() best_idx = selector.query(**self.kwargs, clf=self.clf, batch_size=bs) self.assertEqual(bs, len(best_idx)) # query - PWC clf = PWC(classes=self.classes, random_state=self.random_state) selector = EpistemicUncertainty() selector.query(**self.kwargs, clf=clf) selector.query(**self.kwargs_MISSING_LABEL, clf=clf) best_indices, utilities = selector.query(**self.kwargs, clf=clf, return_utilities=True) self.assertEqual(utilities.shape, (1, len(self.X_cand))) self.assertEqual(best_indices.shape, (1, )) # query - logistic regression clf = SklearnClassifier(LogisticRegression(), classes=self.classes, random_state=self.random_state) selector = EpistemicUncertainty() selector.query(**self.kwargs, clf=clf) selector.query(**self.kwargs_MISSING_LABEL, clf=clf) best_indices, utilities = selector.query(**self.kwargs, clf=clf, return_utilities=True) self.assertEqual(utilities.shape, (1, len(self.X_cand))) self.assertEqual(best_indices.shape, (1, )) best_indices_s, utilities_s = selector.query(**self.kwargs, clf=clf, return_utilities=True, sample_weight=[ 0.5, 1, 1, 1 ]) comp = utilities_s == utilities self.assertTrue(not comp.all())
def test_predict_proba(self): pwc = PWC() gnb = SklearnClassifier(GaussianNB()) clf = MultiAnnotEnsemble(estimators=[('PWC', pwc), ('GNB', gnb)], voting='soft') self.assertRaises(NotFittedError, clf.predict_proba, X=self.X) clf.fit(X=self.X, y=self.y) P = clf.predict_proba(X=self.X) np.testing.assert_allclose(np.ones(len(P)), P.sum(axis=1)) clf.voting = 'hard' clf.fit(X=self.X, y=self.y) P = clf.predict_proba(X=self.X) np.testing.assert_allclose(np.ones(len(P)), P.sum(axis=1))
def setUp(self): self.classes = [0, 1] self.random_state = 1 self.X_cand = np.array([[8, 1], [9, 1], [5, 1]]) self.X = np.array([[1, 2], [5, 8], [8, 4], [5, 4]]) self.y = np.array([0, 0, 1, 1]) self.clf = SklearnClassifier(GaussianProcessClassifier(), classes=self.classes) self.cost_matrix = np.eye(2) self.kwargs = dict(X_cand=self.X_cand, clf=self.clf, X=self.X, y=self.y)
def test_query(self): compare_list = [] clf = SklearnClassifier(estimator=GaussianProcessClassifier(), random_state=self.random_state, classes=self.classes) selector = UncertaintySampling() # return_utilities L = list(selector.query(**self.kwargs, return_utilities=True)) self.assertTrue(len(L) == 2) L = list(selector.query(**self.kwargs, return_utilities=False)) self.assertTrue(len(L) == 1) # batch_size bs = 3 selector = UncertaintySampling() best_idx = selector.query(**self.kwargs, batch_size=bs) self.assertEqual(bs, len(best_idx)) # query selector = UncertaintySampling(method='entropy') selector.query(X_cand=[[1]], clf=clf, X=[[1]], y=[MISSING_LABEL]) compare_list.append(selector.query(**self.kwargs)) selector = UncertaintySampling(method='margin_sampling') selector.query(X_cand=[[1]], clf=clf, X=[[1]], y=[MISSING_LABEL]) compare_list.append(selector.query(**self.kwargs)) selector = UncertaintySampling(method='least_confident') selector.query(X_cand=[[1]], clf=clf, X=[[1]], y=[MISSING_LABEL]) compare_list.append(selector.query(**self.kwargs)) selector = UncertaintySampling(method='margin_sampling', cost_matrix=[[0, 1], [1, 0]]) selector.query(X_cand=[[1]], clf=clf, X=[[1]], y=[MISSING_LABEL]) selector = UncertaintySampling(method='least_confident', cost_matrix=[[0, 1], [1, 0]]) selector.query(X_cand=[[1]], clf=clf, X=[[1]], y=[MISSING_LABEL]) for x in compare_list: self.assertEqual(compare_list[0], x) selector = UncertaintySampling(method='expected_average_precision') selector.query(X_cand=[[1]], clf=clf, X=[[1]], y=[MISSING_LABEL]) best_indices, utilities = selector.query(**self.kwargs, return_utilities=True) self.assertEqual(utilities.shape, (1, len(self.X_cand))) self.assertEqual(best_indices.shape, (1, ))
def test_query_param_ensemble(self): selector = QBC() ensemble_list = [ None, 'test', 1, GaussianProcessClassifier(), SklearnClassifier(GaussianProcessClassifier, classes=self.classes), PWC(classes=self.classes) ] for ensemble in ensemble_list: self.assertRaises(TypeError, selector.query, X_cand=self.X_cand, X=self.X, y=self.y, ensemble=ensemble)
def test_query_param_clf(self): al4ds = FourDS() self.assertRaises(TypeError, al4ds.query, X_cand=self.X, clf=None, X=self.X, y=self.y) clf = SklearnClassifier(GaussianProcessClassifier()) al4ds = FourDS() self.assertRaises(TypeError, al4ds.query, X_cand=self.X, clf=clf, X=self.X, y=self.y)
def test_predict(self): clf = SklearnClassifier(estimator=GaussianProcessClassifier(), missing_label='nan') self.assertRaises(NotFittedError, clf.predict, X=self.X) clf.fit(X=self.X, y=self.y1) y = clf.predict(X=self.X) est = GaussianProcessClassifier().fit(X=np.zeros((3, 1)), y=['tokyo', 'paris', 'tokyo']) y_exp = est.predict(X=self.X) np.testing.assert_array_equal(y, y_exp) np.testing.assert_array_equal(clf.classes_, est.classes_) clf.fit(X=self.X, y=self.y2) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") y = clf.predict(X=self.X) self.assertEqual(len(w), 1) y_exp = ['tokyo'] * len(self.X) np.testing.assert_array_equal(y_exp, y)
def test_init_param_estimators(self): clf = MultiAnnotEnsemble(estimators='Test') self.assertEqual(clf.estimators, 'Test') self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y) clf = MultiAnnotEnsemble(estimators=None) self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y) clf = MultiAnnotEnsemble(estimators=[('GNB', GaussianNB())]) self.assertRaises(TypeError, clf.fit, X=self.X, y=self.y) clf = MultiAnnotEnsemble(estimators=[('PWC', PWC(missing_label=0))]) self.assertRaises(TypeError, clf.fit, X=self.X, y=self.y) clf = MultiAnnotEnsemble(estimators=[('PWC', PWC(missing_label='a'))]) self.assertRaises(TypeError, clf.fit, X=self.X, y=self.y) clf = MultiAnnotEnsemble(classes=[0, 1], estimators=[('PWC', PWC(classes=[0, 2]))]) self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y) clf = MultiAnnotEnsemble(estimators=[('PWC', PWC(classes=[0, 1]))]) self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y) perc = SklearnClassifier(Perceptron()) clf = MultiAnnotEnsemble(estimators=[('perc', perc)], voting='soft') self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y)
def test_predict(self): pwc = PWC(random_state=0) gnb = SklearnClassifier(GaussianNB(), random_state=0) clf = MultiAnnotEnsemble(estimators=[('PWC', pwc), ('GNB', gnb)], voting='soft', random_state=0) self.assertRaises(NotFittedError, clf.predict, X=self.X) clf.fit(X=self.X, y=self.y) y_pred_soft = clf.predict(X=self.X) self.assertEqual(len(y_pred_soft), len(self.X)) self.assertTrue(clf.score(self.X, self.y_true), 0.8) clf.voting = 'hard' clf.fit(X=self.X, y=self.y) y_pred_hard = clf.predict(X=self.X) self.assertEqual(len(y_pred_hard), len(self.X)) self.assertTrue(clf.score(self.X, self.y_true), 0.8) clf.fit(X=self.X, y=self.y, sample_weight=np.ones_like(self.y)) y_pred_hard = clf.predict(X=self.X) self.assertEqual(len(y_pred_hard), len(self.X)) self.assertTrue(clf.score(self.X, self.y_true), 0.8)
def test_partial_fit(self): clf = SklearnClassifier(estimator=GaussianNB(), classes=['tokyo', 'paris', 'new york'], missing_label='nan') self.assertRaises(NotFittedError, check_is_fitted, estimator=clf) clf.partial_fit(self.X, self.y1) self.assertTrue(clf.is_fitted_) self.assertTrue(hasattr(clf, 'class_count_')) np.testing.assert_array_equal(clf.classes_, ['new york', 'paris', 'tokyo']) self.assertEqual(clf.missing_label, 'nan') clf.partial_fit(self.X, self.y2, sample_weight=np.ones_like(self.y2)) self.assertTrue(clf.is_fitted_) self.assertFalse(hasattr(clf, "kernel_")) self.assertTrue(hasattr(clf, 'partial_fit'))
def test_query(self): classes = [0, 1] X_cand = np.array([[8, 1], [9, 1]]) X = np.array([[1, 2], [5, 8], [8, 4], [5, 4]]) y = np.array([MISSING_LABEL, 0, 1, MISSING_LABEL]) cost_matrix = 1 - np.eye(2) clf_partial = SklearnClassifier(GaussianNB(), classes=classes).fit(X, y) clf = SklearnClassifier(estimator=GaussianProcessClassifier(), random_state=self.random_state, classes=classes) qs = VOI() # return_utilities L = list(qs.query(**self.kwargs, return_utilities=True)) self.assertTrue(len(L) == 2) L = list(qs.query(**self.kwargs, return_utilities=False)) self.assertTrue(len(L) == 1) # batch_size bs = 2 best_idx = qs.query(**self.kwargs, batch_size=bs) self.assertEqual(bs, len(best_idx)) # query qs.query(X_cand=X_cand, clf=clf_partial, X=X, y=y) qs = VOI() qs.query(X_cand=X_cand, clf=clf_partial, X=X, y=y) class DummyClf(SkactivemlClassifier): def fit(self, X, y, sample_weight=None): self.classes_ = np.unique(y[labeled_indices(y)]) return self def predict_proba(self, X): return np.full(shape=(len(X), len(self.classes_)), fill_value=0.5) labeling_cost = 2.345 qs = VOI(cost_matrix=cost_matrix, labeling_cost=labeling_cost) idxs, utils = qs.query(X_cand=X_cand, clf=DummyClf(), X=X, y=y, return_utilities=True) np.testing.assert_array_equal(utils[0], [-labeling_cost, -labeling_cost]) labeling_cost = np.array([2.346, 6.234]) qs = VOI(cost_matrix=cost_matrix, labeling_cost=labeling_cost) idxs, utils = qs.query(X_cand=X_cand, clf=DummyClf(), X=X, y=y, return_utilities=True) np.testing.assert_array_equal(utils[0], -labeling_cost) labeling_cost = np.array([[2.346, 6.234]]) expected = [-labeling_cost.mean(), -labeling_cost.mean()] qs = VOI(cost_matrix=cost_matrix, labeling_cost=labeling_cost) idxs, utils = qs.query(X_cand=X_cand, clf=DummyClf(), X=X, y=y, return_utilities=True) np.testing.assert_array_equal(utils[0], expected) labeling_cost = np.array([[2.346, 6.234], [3.876, 3.568]]) expected = -labeling_cost.mean(axis=1) qs = VOI(cost_matrix=cost_matrix, labeling_cost=labeling_cost) idxs, utils = qs.query(X_cand=X_cand, clf=DummyClf(), X=X, y=y, return_utilities=True) np.testing.assert_array_equal(utils[0], expected)
def test_predict_proba(self): clf = SklearnClassifier(estimator=GaussianProcessClassifier(), missing_label='nan') self.assertRaises(NotFittedError, clf.predict_proba, X=self.X) clf.fit(X=self.X, y=self.y1) P = clf.predict_proba(X=self.X) est = GaussianProcessClassifier().fit(X=np.zeros((3, 1)), y=['tokyo', 'paris', 'tokyo']) P_exp = est.predict_proba(X=self.X) np.testing.assert_array_equal(P_exp, P) np.testing.assert_array_equal(clf.classes_, est.classes_) clf.fit(X=self.X, y=self.y2) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") P = clf.predict_proba(X=self.X) self.assertEqual(len(w), 1) P_exp = np.ones((len(self.X), 1)) np.testing.assert_array_equal(P_exp, P) clf = SklearnClassifier(estimator=GaussianProcessClassifier(), classes=['ny', 'paris', 'tokyo'], missing_label='nan') clf.fit(X=self.X, y=self.y_nan) P = clf.predict_proba(X=self.X) P_exp = np.ones((len(self.X), 3)) / 3 np.testing.assert_array_equal(P_exp, P) clf.fit(X=self.X, y=self.y1) P = clf.predict_proba(X=self.X) P_exp = np.zeros((len(self.X), 3)) P_exp[:, 1:] = est.predict_proba(X=self.X) np.testing.assert_array_equal(P_exp, P)
def test_fit(self): clf = SklearnClassifier(estimator=GaussianProcessClassifier(), missing_label='nan', classes=['tokyo', 'paris'], random_state=0) np.testing.assert_array_equal(['tokyo', 'paris'], clf.classes) self.assertEqual(clf.kernel, clf.estimator.kernel) self.assertFalse(hasattr(clf, 'kernel_')) clf = SklearnClassifier(estimator=Perceptron(), missing_label='nan', cost_matrix=1 - np.eye(2), classes=['tokyo', 'paris'], random_state=0) self.assertRaises(ValueError, clf.fit, X=self.X, y=self.y1) clf = SklearnClassifier(estimator=GaussianProcessClassifier()) self.assertRaises(NotFittedError, check_is_fitted, estimator=clf) clf = SklearnClassifier(estimator=GaussianProcessClassifier(), classes=['tokyo', 'paris', 'new york'], missing_label='nan') self.assertRaises(NotFittedError, check_is_fitted, estimator=clf) clf.fit(self.X, self.y1) self.assertTrue(clf.is_fitted_) self.assertTrue(hasattr(clf, 'kernel_')) np.testing.assert_array_equal(clf.classes_, ['new york', 'paris', 'tokyo']) self.assertEqual(clf.missing_label, 'nan') with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") clf.fit(self.X, self.y2) self.assertEqual(len(w), 1) self.assertFalse(clf.is_fitted_) self.assertFalse(hasattr(clf, "kernel_")) self.assertFalse(hasattr(clf, 'partial_fit')) X = [[1], [0]] y_true = [1, 0] clf = SklearnClassifier(GaussianProcessClassifier(), classes=[0, 1]) ensemble = SklearnClassifier(BaggingClassifier(clf), classes=[0, 1]) ensemble.fit(X, y_true) self.assertTrue(ensemble.is_fitted_, True)