def test_score_calculation_specific(self):
        model_output = [[0.7, 0.2, 0.1], [0.4, 0.5, 0.1]]
        model_output = np.array(model_output)
        scorer = ScorerClassification(model_output)
        scores = scorer.calculate_scores()

        self.assertListEqual(list(scores["uncertainty_least_confidence"]),
                             [(1 - 0.7) / (1 - 1. / 3.),
                              (1 - 0.5) / (1 - 1. / 3.)])
        self.assertListEqual(list(scores["uncertainty_margin"]),
                             [1 - (0.7 - 0.2), 1 - (0.5 - 0.4)])
        for val1, val2 in zip(scores["uncertainty_entropy"],
                              _entropy(model_output) / np.log2(3)):
            self.assertAlmostEqual(val1, val2, places=8)
    def test_scorer_classification_variable_model_output_tensor_order(self):

        for tensor_order in range(1, 5):
            model_output = np.ndarray((3, ) * tensor_order)
            with self.subTest(
                    msg=f"model_output.shape = {model_output.shape}"):
                if tensor_order == 2 or tensor_order == 0:
                    scorer = ScorerClassification(model_output=model_output)
                    scores = scorer.calculate_scores()
                    for score_values in scores.values():
                        self.assertEqual(type(score_values), np.ndarray)
                else:
                    with self.assertRaises(ValueError):
                        scorer = ScorerClassification(
                            model_output=model_output)
    def test_score_calculation_binary(self):
        model_output = [[0.7], [0.4]]
        model_output = np.array(model_output)
        scorer = ScorerClassification(model_output)
        scores = scorer.calculate_scores()

        self.assertListEqual(list(scores["uncertainty_least_confidence"]),
                             [(1 - 0.7) / (1 - 1. / 2.),
                              (1 - 0.6) / (1 - 1. / 2.)])
        self.assertListEqual(list(scores["uncertainty_margin"]),
                             [1 - (0.7 - 0.3), 1 - (0.6 - 0.4)])
        model_output = np.concatenate([model_output, 1 - model_output], axis=1)
        for val1, val2 in zip(scores["uncertainty_entropy"],
                              _entropy(model_output) / np.log2(2)):
            self.assertAlmostEqual(val1, val2, places=8)
    def test_agent(self):
        self.api_workflow_client.embedding_id = "embedding_id_xyz"

        agent_0 = ActiveLearningAgent(self.api_workflow_client)
        agent_1 = ActiveLearningAgent(self.api_workflow_client, query_tag_name="query_tag_name_xyz")
        agent_2 = ActiveLearningAgent(self.api_workflow_client, query_tag_name="query_tag_name_xyz",
                                      preselected_tag_name="preselected_tag_name_xyz")
        agent_3 = ActiveLearningAgent(self.api_workflow_client, preselected_tag_name="preselected_tag_name_xyz")

        for method in [SamplingMethod.CORAL, SamplingMethod.CORESET, SamplingMethod.RANDOM]:
            for agent in [agent_0, agent_1, agent_2, agent_3]:
                for batch_size in [2, 6]:
                    n_old_labeled = len(agent.labeled_set)
                    n_old_unlabeled = len(agent.unlabeled_set)

                    n_samples = len(agent.labeled_set) + batch_size
                    if method == SamplingMethod.CORAL and len(agent.labeled_set) == 0:
                        sampler_config = SamplerConfig(n_samples=n_samples, method=SamplingMethod.CORESET)
                    else:
                        sampler_config = SamplerConfig(n_samples=n_samples, method=method)

                    if sampler_config.method == SamplingMethod.CORAL:
                        predictions = np.random.rand(len(agent.unlabeled_set), 10).astype(np.float32)
                        predictions_normalized = predictions / np.sum(predictions, axis=1)[:, np.newaxis]
                        al_scorer = ScorerClassification(predictions_normalized)
                        labeled_set, added_set = agent.query(sampler_config=sampler_config, al_scorer=al_scorer)
                    else:
                        sampler_config = SamplerConfig(n_samples=n_samples)
                        labeled_set, added_set = agent.query(sampler_config=sampler_config)

                    self.assertEqual(n_old_labeled + len(added_set), len(labeled_set))
                    assert set(added_set).issubset(labeled_set)
                    self.assertEqual(len(list(set(agent.labeled_set) & set(agent.unlabeled_set))), 0)
                    self.assertEqual(n_old_unlabeled - len(added_set), len(agent.unlabeled_set))
示例#5
0
def t_est_active_learning(api_workflow_client: ApiWorkflowClient,
                          method: SamplingMethod = SamplingMethod.CORAL,
                          query_tag_name: str = 'initial-tag',
                          preselected_tag_name: str = None,
                          n_samples_additional: List[int] = [2, 5]):
    # create the tags with 100 respectively 10 samples if not yet existant
    if query_tag_name is not None:
        sampler_config = SamplerConfig(method=SamplingMethod.RANDOM,
                                       n_samples=100,
                                       name=query_tag_name)
        try:
            api_workflow_client.sampling(sampler_config=sampler_config)
        except RuntimeError:
            pass
    if preselected_tag_name is not None:
        sampler_config = SamplerConfig(method=SamplingMethod.RANDOM,
                                       n_samples=10,
                                       name=preselected_tag_name)
        try:
            api_workflow_client.sampling(sampler_config=sampler_config)
        except RuntimeError:
            pass

    # define the active learning agent
    agent = ActiveLearningAgent(api_workflow_client,
                                query_tag_name=query_tag_name,
                                preselected_tag_name=preselected_tag_name)

    total_no_samples = len(agent.unlabeled_set) + len(agent.labeled_set)

    al_scorer = None

    for iteration, n_samples_additional in enumerate(n_samples_additional):
        n_samples = len(agent.labeled_set) + n_samples_additional
        print(
            f"Beginning with iteration {iteration} to have {n_samples} labeled samples."
        )

        # Perform a sampling
        method_here = SamplingMethod.CORESET if iteration == 0 and method == SamplingMethod.CORAL else method
        sampler_config = SamplerConfig(method=method_here, n_samples=n_samples)
        if al_scorer is None:
            agent.query(sampler_config=sampler_config)
        else:
            agent.query(sampler_config=sampler_config, al_scorer=al_scorer)

        assert len(agent.labeled_set) == n_samples
        assert len(agent.unlabeled_set) == total_no_samples - n_samples

        # Update the scorer
        n_samples = len(agent.query_set)
        n_classes = 10
        predictions = np.random.rand(n_samples, n_classes)
        predictions_normalized = predictions / np.sum(predictions,
                                                      axis=1)[:, np.newaxis]
        model_output = predictions_normalized
        al_scorer = ScorerClassification(model_output=predictions)

    print("Success!")
示例#6
0
    def test_score_calculation(self):
        n_samples = 100
        n_classes = 10
        predictions = np.random.rand(n_samples, n_classes)
        predictions_normalized = predictions / np.sum(predictions,
                                                      axis=1)[:, np.newaxis]
        model_output = predictions_normalized
        scorer = ScorerClassification(model_output)
        scores = scorer.calculate_scores()
        scores_prediction_entropy = scores["prediction-entropy"]
        scores_prediction_margin = scores["prediction-margin"]

        assert scores_prediction_entropy.shape == (n_samples, )
        assert scores_prediction_margin.shape == (n_samples, )
        assert all(scores_prediction_entropy >= 0)
        assert all(scores_prediction_margin >= 0)
        assert all(scores_prediction_margin <= 1)
    def test_score_calculation_random(self):
        n_samples = 10000
        n_classes = 10
        np.random.seed(42)
        predictions = np.random.rand(n_samples, n_classes)
        predictions_normalized = predictions / np.sum(predictions,
                                                      axis=1)[:, np.newaxis]
        model_output = predictions_normalized
        scorer = ScorerClassification(model_output)
        scores = scorer.calculate_scores()

        self.assertEqual(set(scores.keys()),
                         set(ScorerClassification.score_names()))

        for score_name, score in scores.items():
            self.assertEqual(score.shape, (n_samples, ))
            self.assertTrue(all(score >= 0))
            self.assertTrue(all(score <= 1))
            self.assertEqual(type(score), np.ndarray)
    def test_scorer_classification_variable_model_output_dimension(self):

        for num_samples in range(5):
            for num_classes in range(5):

                with self.subTest(
                        msg=
                        f"model_output.shape = ({num_samples},{num_classes})"):
                    if num_samples > 0:
                        preds = [1. / num_samples] * num_classes
                    else:
                        preds = []
                    model_output = [preds] * num_samples

                    if num_classes == 0 and num_samples > 0:
                        with self.assertRaises(ValueError):
                            scorer = ScorerClassification(
                                model_output=model_output)
                    else:
                        scorer = ScorerClassification(
                            model_output=model_output)
                        scores = scorer.calculate_scores()
                        self.assertEqual(
                            set(scores.keys()),
                            set(ScorerClassification.score_names()))
                        for score_values in scores.values():
                            self.assertEqual(len(score_values),
                                             len(model_output))
                            self.assertEqual(type(score_values), np.ndarray)
示例#9
0
    def test_agent_only_upload_scores(self):
        self.api_workflow_client.embedding_id = "embedding_id_xyz"
        agent = ActiveLearningAgent(
            self.api_workflow_client,
            preselected_tag_name="preselected_tag_name_xyz",
        )

        n_predictions = len(agent.query_set)
        predictions = np.random.rand(n_predictions, 10).astype(np.float32)
        predictions_normalized = predictions / np.sum(predictions,
                                                      axis=1)[:, np.newaxis]
        al_scorer = ScorerClassification(predictions_normalized)

        agent.upload_scores(al_scorer)
    def test_agent_wrong_scores(self):
        self.api_workflow_client.embedding_id = "embedding_id_xyz"

        agent = ActiveLearningAgent(self.api_workflow_client, preselected_tag_name="preselected_tag_name_xyz")
        method = SamplingMethod.CORAL
        n_samples = len(agent.labeled_set) + 2

        n_predictions = len(agent.unlabeled_set) - 3  # the -3 should cause en error
        predictions = np.random.rand(n_predictions, 10).astype(np.float32)
        predictions_normalized = predictions / np.sum(predictions, axis=1)[:, np.newaxis]
        al_scorer = ScorerClassification(predictions_normalized)

        sampler_config = SamplerConfig(n_samples=n_samples, method=method)
        with self.assertRaises(ValueError):
            labeled_set, added_set = agent.query(sampler_config=sampler_config, al_scorer=al_scorer)
示例#11
0
    def test_agent_without_embedding_id(self):
        agent = ActiveLearningAgent(
            self.api_workflow_client,
            preselected_tag_name="preselected_tag_name_xyz")
        method = SamplingMethod.CORAL
        n_samples = len(agent.labeled_set) + 2

        n_predictions = len(agent.query_set)
        predictions = np.random.rand(n_predictions, 10).astype(np.float32)
        predictions_normalized = predictions / np.sum(predictions,
                                                      axis=1)[:, np.newaxis]
        al_scorer = ScorerClassification(predictions_normalized)

        sampler_config = SamplerConfig(n_samples=n_samples, method=method)
        agent.query(sampler_config=sampler_config, al_scorer=al_scorer)
示例#12
0
    def test_agent(self):
        self.api_workflow_client.embedding_id = "embedding_id_xyz"

        agent_0 = ActiveLearningAgent(self.api_workflow_client)
        agent_1 = ActiveLearningAgent(self.api_workflow_client,
                                      query_tag_name="query_tag_name_xyz")
        agent_2 = ActiveLearningAgent(
            self.api_workflow_client,
            query_tag_name="query_tag_name_xyz",
            preselected_tag_name="preselected_tag_name_xyz")
        agent_3 = ActiveLearningAgent(
            self.api_workflow_client,
            preselected_tag_name="preselected_tag_name_xyz")

        for method in [
                SamplingMethod.CORAL, SamplingMethod.CORESET,
                SamplingMethod.RANDOM
        ]:
            for agent in [agent_0, agent_1, agent_2, agent_3]:
                for batch_size in [2, 6]:
                    n_samples = len(agent.labeled_set) + batch_size
                    if method == SamplingMethod.CORAL and len(
                            agent.labeled_set) > 0:
                        sampler_config = SamplerConfig(
                            n_samples=n_samples, method=SamplingMethod.CORESET)
                    else:
                        sampler_config = SamplerConfig(n_samples=n_samples,
                                                       method=method)

                    if sampler_config.method == SamplingMethod.CORESET:
                        predictions = np.random.rand(len(agent.unlabeled_set),
                                                     10)
                        predictions_normalized = predictions / np.sum(
                            predictions, axis=1)[:, np.newaxis]
                        al_scorer = ScorerClassification(
                            predictions_normalized)
                        chosen_filenames = agent.query(
                            sampler_config=sampler_config, al_scorer=al_scorer)
                    else:
                        sampler_config = SamplerConfig(n_samples=n_samples)
                        chosen_filenames = agent.query(
                            sampler_config=sampler_config)
 def test_scorer_classification_empty_model_output(self):
     scorer = ScorerClassification(model_output=[])
     scores = scorer.calculate_scores()
     self.assertEqual(set(scores.keys()),
                      set(ScorerClassification.score_names()))
示例#14
0
print(f"There are {len(agent.labeled_set)} samples in the labeled set.")

# %%
# 2. Train a classifier on the labeled set.
labeled_set_features = dataset.get_features(agent.labeled_set)
labeled_set_labels = dataset.get_labels(agent.labeled_set)
classifier.fit(X=labeled_set_features, y=labeled_set_labels)

# %%
# 3. Use the classifier to predict on the query set.
query_set_features = dataset.get_features(agent.query_set)
predictions = classifier.predict_proba(X=query_set_features)

# %%
# 4. Calculate active learning scores from the prediction.
active_learning_scorer = ScorerClassification(model_output=predictions)

# %%
# 5. Use an active learning agent to choose the next samples to be labeled based on the active learning scores.
# We want to sample another 100 samples to have 300 samples in total and use the active learning sampler CORAL for it.
sampler_config = SamplerConfig(n_samples=300,
                               method=SamplingMethod.CORAL,
                               name='al-iteration-1')
agent.query(sampler_config=sampler_config, al_scorer=active_learning_scorer)
print(f"There are {len(agent.labeled_set)} samples in the labeled set.")

# %%
# 6. Update the labeled set to include the newly chosen samples and remove them from the unlabeled set.
# This is already done internally inside the ActiveLearningAgent - no work for you :)

# %%