Exemplo n.º 1
0
 def _test_init_param_cost_matrix(self, clf_class):
     clf_mdl = call_func(clf_class,
                         estimator=self.estimator,
                         estimators=self.estimators)
     self.assertEqual(clf_mdl.cost_matrix, None)
     clf_mdl = call_func(clf_class,
                         estimator=self.estimator,
                         estimators=self.estimators,
                         cost_matrix=[1, 2],
                         missing_label=self.missing_label,
                         classes=self.classes)
     np.testing.assert_array_equal(clf_mdl.cost_matrix, [1, 2])
     clf_mdl = call_func(clf_class,
                         estimator=self.estimator,
                         estimators=self.estimators,
                         cost_matrix=1 - np.eye(2),
                         classes=self.classes,
                         missing_label=self.missing_label)
     self.assertRaises(ValueError, clf_mdl.fit, X=self.X, y=self.y)
     clf_mdl = call_func(clf_class,
                         estimator=self.estimator,
                         estimators=self.estimators,
                         cost_matrix=[['2', '5', '3'], ['a', '5', '3'],
                                      ['a', '5', '3']],
                         classes=self.classes,
                         missing_label=self.missing_label)
     self.assertRaises(ValueError, clf_mdl.fit, X=self.X, y=self.y)
Exemplo n.º 2
0
 def _test_init_param_class_prior(self, clf_class):
     clf_mdl = call_func(clf_class,
                         estimator=self.estimator,
                         estimators=self.estimators)
     self.assertEqual(clf_mdl.class_prior, 0)
     clf_mdl = call_func(clf_class,
                         estimator=self.estimator,
                         estimators=self.estimators,
                         class_prior=2)
     self.assertEqual(clf_mdl.class_prior, 2)
     clf_mdl = call_func(clf_class,
                         estimator=self.estimator,
                         estimators=self.estimators,
                         missing_label=self.missing_label,
                         class_prior=-1.0)
     self.assertRaises(ValueError, clf_mdl.fit, X=self.X, y=self.y)
     clf_mdl = call_func(clf_class,
                         estimator=self.estimator,
                         estimators=self.estimators,
                         missing_label=self.missing_label,
                         class_prior=['test'])
     self.assertRaises(ValueError, clf_mdl.fit, X=self.X, y=self.y)
     clf_mdl = call_func(clf_class,
                         estimator=self.estimator,
                         estimators=self.estimators,
                         missing_label=self.missing_label,
                         class_prior='test')
     self.assertRaises(TypeError, clf_mdl.fit, X=self.X, y=self.y)
Exemplo n.º 3
0
 def _test_init_param_classes(self, clf_class):
     clf_mdl = call_func(clf_class,
                         estimator=self.estimator,
                         estimators=self.estimators)
     self.assertEqual(clf_mdl.classes, None)
     clf_mdl = call_func(clf_class,
                         estimator=self.estimator,
                         estimators=self.estimators,
                         classes=[0, 1])
     np.testing.assert_array_equal(clf_mdl.classes, [0, 1])
     clf_mdl = call_func(clf_class,
                         estimator=self.estimator,
                         estimators=self.estimators,
                         classes='Test',
                         missing_label=self.missing_label)
     self.assertRaises(ValueError, clf_mdl.fit, X=self.X, y=self.y)
     clf_mdl = call_func(clf_class,
                         estimator=self.estimator,
                         estimators=self.estimators,
                         classes=[0, 1],
                         missing_label=self.missing_label)
     self.assertRaises(ValueError, clf_mdl.fit, X=self.X, y=self.y)
     clf_mdl = call_func(clf_class,
                         estimator=self.estimator,
                         estimators=self.estimators,
                         classes=[0, 1, self.missing_label],
                         missing_label=self.missing_label)
     self.assertRaises(TypeError, clf_mdl.fit, X=self.X, y=self.y)
Exemplo n.º 4
0
    def test_call_func(self):
        def dummy_function(a, b=2, c=3):
            return a * b * c

        result = call_func(dummy_function, a=2, b=5, c=5)
        self.assertEqual(result, 50)
        result = call_func(dummy_function, only_mandatory=True, a=2, b=5, c=5)
        self.assertEqual(result, 12)
Exemplo n.º 5
0
 def _test_init_param_missing_label(self, clf_class):
     clf_mdl = call_func(clf_class,
                         estimator=self.estimator,
                         estimators=self.estimators)
     self.assertTrue(np.isnan(clf_mdl.missing_label))
     clf_mdl = call_func(clf_class,
                         estimator=self.estimator,
                         estimators=self.estimators,
                         missing_label='Test')
     self.assertTrue(clf_mdl.missing_label, 'Test')
     self.assertRaises(TypeError, clf_mdl.fit, X=self.X, y=self.y)
Exemplo n.º 6
0
 def _test_init_param_random_state(self, clf_class):
     clf_mdl = call_func(clf_class,
                         estimator=self.estimator,
                         estimators=self.estimators)
     self.assertTrue(clf_mdl.random_state is None)
     clf_mdl = call_func(clf_class,
                         estimator=self.estimator,
                         estimators=self.estimators,
                         random_state='Test',
                         missing_label=self.missing_label)
     self.assertEqual(clf_mdl.random_state, 'Test')
     self.assertRaises(ValueError, clf_mdl.fit, X=self.X, y=self.y)
Exemplo n.º 7
0
 def _test_init_param_random_state(self, qs_class, clf):
     qs_mdl = call_func(qs_class, classes=np.unique(self.y_true))
     self.assertTrue(qs_mdl.random_state is None)
     qs_mdl = call_func(qs_class,
                        classes=np.unique(self.y_true),
                        clf=clf,
                        random_state='Test')
     self.assertEqual(qs_mdl.random_state, 'Test')
     self.assertRaises(ValueError,
                       call_func,
                       qs_mdl.query,
                       X_cand=self.X,
                       clf=clf,
                       X=self.X,
                       y=self.y,
                       ensemble=self.ensemble)
Exemplo n.º 8
0
 def _test_fit_param_y(self, clf_class):
     clf_mdl = call_func(clf_class,
                         estimator=self.estimator,
                         estimators=self.estimators,
                         missing_label=self.missing_label)
     X = [[0], [1]]
     self.assertRaises(ValueError, clf_mdl.fit, X=X, y=[0, 1, 2])
     self.assertRaises(ValueError, clf_mdl.fit, X=X, y=[[0], [1], [2]])
Exemplo n.º 9
0
    def _test_single_classifier(self, clf):
        # Create fully initialized classifier.
        clf_mdl = call_func(self.classifiers[clf],
                            estimator=self.estimator,
                            estimators=self.estimators,
                            classes=self.classes,
                            cost_matrix=1 - np.eye(len(self.classes)),
                            missing_label=self.missing_label,
                            voting='soft',
                            random_state=0)

        # Create classifier without classes parameter.
        clf_mdl_cls = deepcopy(clf_mdl)
        clf_mdl_cls.classes = None

        # Test classifier without fitting.
        with self.subTest(msg="Not Fitted Test", clf_name=clf):
            self.assertRaises(NotFittedError, clf_mdl_cls.predict, X=self.X)
            self.assertRaises(NotFittedError, clf_mdl.predict, X=self.X)

        # Test classifier on empty data set.
        with self.subTest(msg="Empty Data Test", clf_name=clf):
            self.assertRaises(ValueError, clf_mdl_cls.predict, X=self.X)
            clf_mdl.fit(X=[], y=[])
            P = clf_mdl.predict_proba(X=self.X)
            np.testing.assert_array_equal(P, np.ones((len(self.X), 3)) / 3)
            if hasattr(clf_mdl, 'predict_annot_perf'):
                P = clf_mdl.predict_annot_perf(X=self.X)
                np.testing.assert_array_equal(P, np.ones((len(self.X), 1)) / 3)

        # Test classifier on data with only missing labels.
        with self.subTest(msg="Missing Label Test", clf_name=clf):
            self.assertRaises(ValueError,
                              clf_mdl_cls.fit,
                              X=self.X,
                              y=self.y_missing_label)
            clf_mdl.fit(X=self.X, y=self.y_missing_label)
            score = clf_mdl.score(self.X, self.y_true)
            self.assertTrue(score > 0)
            P_exp = np.ones((len(self.X), len(self.classes))) \
                    / len(self.classes)
            P = clf_mdl.predict_proba(self.X)
            np.testing.assert_array_equal(P_exp, P)
            if hasattr(clf_mdl, 'predict_freq'):
                F_exp = np.zeros((len(self.X), len(self.classes)))
                F = clf_mdl.predict_freq(self.X)
                np.testing.assert_array_equal(F_exp, F)

        # Test classifier on full data set.
        with self.subTest(msg="Full Data Test", clf_name=clf):
            score = clf_mdl.fit(self.X, self.y).score(self.X, self.y_true)
            self.assertTrue(score > 0.8)
            if hasattr(clf_mdl, 'predict_proba'):
                P = clf_mdl.predict_proba(self.X)
                self.assertTrue(np.sum(P != 1 / len(self.classes)) > 0)
            if hasattr(clf_mdl, 'predict_freq'):
                F = clf_mdl.predict_freq(self.X)
                self.assertTrue(np.sum(F) > 0)
Exemplo n.º 10
0
 def _test_predict_freq_param_X(self, clf_class):
     clf_mdl = call_func(clf_class,
                         estimator=self.estimator,
                         estimators=self.estimators,
                         missing_label=self.missing_label)
     clf_mdl.fit(X=self.X, y=self.y)
     self.assertRaises(ValueError, clf_mdl.predict_freq, X=[0, 0])
     self.assertRaises(ValueError, clf_mdl.predict_freq, X=[[0], [0]])
     self.assertRaises(ValueError, clf_mdl.predict_freq, X=[['x', 'y']])
Exemplo n.º 11
0
    def _test_update_before_query(self, rand_seed, query_strategy_class, clf,
                                  X_init, y_init, X_stream, y_stream,
                                  training_size, qs_name):
        rand = check_random_state(rand_seed)
        random_state = rand.randint(2**31 - 1)
        query_strategy = query_strategy_class(random_state=random_state)

        query_strategy2 = query_strategy_class(random_state=random_state)

        X_train = deque(maxlen=training_size)
        X_train.extend(X_init)
        y_train = deque(maxlen=training_size)
        y_train.extend(y_init)

        for t, (x_t, y_t) in enumerate(zip(X_stream, y_stream)):
            return_utilities = t % 2 == 0
            qs_output = call_func(query_strategy.query,
                                  X_cand=x_t.reshape([1, -1]),
                                  clf=clf,
                                  return_utilities=return_utilities)

            if return_utilities:
                queried_indices, utilities = qs_output
            else:
                queried_indices = qs_output
                utilities = [0.5]
            budget_manager_param_dict1 = {"utilities": utilities}
            budget_manager_param_dict2 = {"utilities": utilities}
            call_func(query_strategy.update,
                      X_cand=x_t.reshape([1, -1]),
                      queried_indices=queried_indices,
                      budget_manager_param_dict=budget_manager_param_dict1)
            call_func(query_strategy2.update,
                      X_cand=x_t.reshape([1, -1]),
                      queried_indices=queried_indices,
                      budget_manager_param_dict=budget_manager_param_dict2)
            X_train.append(x_t)
            if len(queried_indices):
                y_train.append(y_t)
            else:
                y_train.append(clf.missing_label)
            clf.fit(X_train, y_train)
Exemplo n.º 12
0
 def _test_query_param_return_utilities(self, qs_class, clf):
     qs_mdl = call_func(qs_class, classes=np.unique(self.y_true))
     self.assertRaises(TypeError,
                       call_func,
                       qs_mdl.query,
                       X_cand=self.X,
                       clf=clf,
                       X=self.X,
                       y=self.y,
                       return_utilities='test',
                       ensemble=self.ensemble)
Exemplo n.º 13
0
 def _test_query_param_X_cand(self, qs_class, clf):
     qs_mdl = call_func(qs_class, classes=np.unique(self.y_true))
     for X_cand in [None, [], np.ones(5)]:
         self.assertRaises(ValueError,
                           call_func,
                           qs_mdl.query,
                           X_cand=X_cand,
                           clf=clf,
                           X=self.X,
                           y=self.y,
                           ensemble=self.ensemble)
Exemplo n.º 14
0
 def _test_fit_param_sample_weight(self, clf_class):
     clf_mdl = call_func(clf_class,
                         estimator=self.estimator,
                         estimators=self.estimators,
                         missing_label=self.missing_label)
     X = [[0], [1]]
     y = [0, 1]
     self.assertRaises(ValueError,
                       clf_mdl.fit,
                       X=X,
                       y=y,
                       sample_weight=[0, 1, 1])
     self.assertRaises(ValueError,
                       clf_mdl.fit,
                       X=X,
                       y=y,
                       sample_weight=[[1, 1], [1, 1]])
Exemplo n.º 15
0
 def _test_query_param_batch_size(self, qs_class, clf):
     qs_mdl = call_func(qs_class, classes=np.unique(self.y_true))
     self.assertRaises(ValueError,
                       call_func,
                       qs_mdl.query,
                       X_cand=self.X,
                       clf=clf,
                       X=self.X,
                       y=self.y,
                       batch_size=0,
                       ensemble=self.ensemble)
     self.assertRaises(TypeError,
                       call_func,
                       qs_mdl.query,
                       X_cand=self.X,
                       clf=clf,
                       X=self.X,
                       y=self.y,
                       batch_size=1.2,
                       ensemble=self.ensemble)
Exemplo n.º 16
0
    def test_budget_managers(self):
        # Create data set for testing.
        rand = np.random.RandomState(0)
        random_state = rand.randint(2 ** 31 - 1)
        utilities = rand.rand(1000)

        for bm_name, bm_class in self.budget_managers.items():
            bm_kwargs = {}
            bm_init_sig = inspect.signature(bm_class.__init__)
            bm_init_params = bm_init_sig.parameters.keys()
            if "random_state" in bm_init_params:
                bm_kwargs["random_state"] = random_state
            bm = bm_class(**bm_kwargs)
            bm2 = bm_class(**bm_kwargs)
            for t, u in enumerate(utilities):
                queried_indices = call_func(
                    bm.query_by_utility,
                    utilities=u.reshape([1, -1]),
                )

                for i in range(3):
                    queried_indices2 = call_func(
                        bm2.query_by_utility,
                        utilities=u.reshape([1, -1]),
                    )
                self.assertEqual(len(queried_indices), len(queried_indices2))
                call_func(
                    bm.update,
                    X_cand=u.reshape([1, -1]),
                    queried_indices=queried_indices,
                    utilities=u.reshape([1, -1]),
                )
                call_func(
                    bm2.update,
                    X_cand=u.reshape([1, -1]),
                    queried_indices=queried_indices,
                    utilities=u.reshape([1, -1]),
                )
Exemplo n.º 17
0
    def test_al_cycle(self):
        for qs_name in self.query_strategies:
            clf = self.cmm if qs_name == "FourDS" else self.clf
            with self.subTest(msg="Random State", qs_name=qs_name):
                y = np.full(self.y_true.shape, self.MISSING_LABEL)
                qs = call_func(self.query_strategies[qs_name],
                               only_mandatory=False,
                               classes=np.unique(self.y_true),
                               random_state=np.random.RandomState(0),
                               clf=clf,
                               ensemble=self.ensemble)

                unlabeled = np.where(is_unlabeled(y))[0]
                id1, u1 = call_func(qs.query,
                                    X_cand=self.X[unlabeled],
                                    X=self.X,
                                    y=y,
                                    clf=clf,
                                    X_eval=self.X,
                                    ensemble=self.ensemble,
                                    return_utilities=True)
                id2, u2 = call_func(qs.query,
                                    X_cand=self.X[unlabeled],
                                    X=self.X,
                                    y=y,
                                    clf=clf,
                                    X_eval=self.X,
                                    ensemble=self.ensemble,
                                    return_utilities=True)
                np.testing.assert_array_equal(id1, id2)
                np.testing.assert_array_equal(u1, u2)

            with self.subTest(msg="Batch", qs_name=qs_name):
                y = np.full(self.y_true.shape, self.MISSING_LABEL)
                qs = call_func(self.query_strategies[qs_name],
                               only_mandatory=True,
                               clf=clf,
                               classes=np.unique(self.y_true),
                               ensemble=self.ensemble,
                               random_state=np.random.RandomState(0))

                ids, u = call_func(qs.query,
                                   X_cand=self.X[unlabeled],
                                   X=self.X,
                                   y=y,
                                   clf=clf,
                                   X_eval=self.X,
                                   batch_size=5,
                                   ensemble=self.ensemble,
                                   return_utilities=True)
                self.assertEqual(len(ids), 5)
                self.assertEqual(len(u),
                                 5,
                                 msg='utility score should '
                                 'have shape (5xN)')
                self.assertEqual(len(u[0]),
                                 len(unlabeled),
                                 msg='utility score must have shape (5xN)')

                self.assertWarns(Warning,
                                 call_func,
                                 f_callable=qs.query,
                                 X_cand=self.X[unlabeled],
                                 X=self.X,
                                 y=y,
                                 clf=clf,
                                 X_eval=self.X,
                                 ensemble=self.ensemble,
                                 batch_size=15)

                with warnings.catch_warnings():
                    warnings.filterwarnings("ignore")
                    ids = call_func(qs.query,
                                    X_cand=self.X[unlabeled],
                                    X=self.X,
                                    y=y,
                                    X_eval=self.X,
                                    clf=clf,
                                    batch_size=15,
                                    ensemble=self.ensemble)
                    self.assertEqual(len(ids), 10)

            for init_budget in [5, 1, 0]:
                y = np.full(self.y_true.shape, self.MISSING_LABEL)
                y[0:init_budget] = self.y_true[0:init_budget]

                with self.subTest(msg="Basic AL Cycle",
                                  init_budget=init_budget,
                                  qs_name=qs_name):
                    qs = call_func(self.query_strategies[qs_name],
                                   only_mandatory=True,
                                   clf=clf,
                                   classes=np.unique(self.y_true),
                                   random_state=1,
                                   ensemble=self.ensemble)

                    for b in range(self.budget):
                        unlabeled = np.where(is_unlabeled(y))[0]
                        clf.fit(self.X, y)
                        ids = call_func(
                            qs.query,
                            X_cand=self.X[unlabeled],
                            clf=clf,
                            X=self.X,
                            y=y,
                            X_eval=self.X,
                            ensemble=self.ensemble,
                        )
                        sample_id = unlabeled[ids]
                        y[sample_id] = self.y_true[sample_id]