def test_agent(self):
        self.api_workflow_client.embedding_id = "embedding_id_xyz"

        agent_0 = ActiveLearningAgent(self.api_workflow_client)
        agent_1 = ActiveLearningAgent(self.api_workflow_client, query_tag_name="query_tag_name_xyz")
        agent_2 = ActiveLearningAgent(self.api_workflow_client, query_tag_name="query_tag_name_xyz",
                                      preselected_tag_name="preselected_tag_name_xyz")
        agent_3 = ActiveLearningAgent(self.api_workflow_client, preselected_tag_name="preselected_tag_name_xyz")

        for method in [SamplingMethod.CORAL, SamplingMethod.CORESET, SamplingMethod.RANDOM]:
            for agent in [agent_0, agent_1, agent_2, agent_3]:
                for batch_size in [2, 6]:
                    n_old_labeled = len(agent.labeled_set)
                    n_old_unlabeled = len(agent.unlabeled_set)

                    n_samples = len(agent.labeled_set) + batch_size
                    if method == SamplingMethod.CORAL and len(agent.labeled_set) == 0:
                        sampler_config = SamplerConfig(n_samples=n_samples, method=SamplingMethod.CORESET)
                    else:
                        sampler_config = SamplerConfig(n_samples=n_samples, method=method)

                    if sampler_config.method == SamplingMethod.CORAL:
                        predictions = np.random.rand(len(agent.unlabeled_set), 10).astype(np.float32)
                        predictions_normalized = predictions / np.sum(predictions, axis=1)[:, np.newaxis]
                        al_scorer = ScorerClassification(predictions_normalized)
                        labeled_set, added_set = agent.query(sampler_config=sampler_config, al_scorer=al_scorer)
                    else:
                        sampler_config = SamplerConfig(n_samples=n_samples)
                        labeled_set, added_set = agent.query(sampler_config=sampler_config)

                    self.assertEqual(n_old_labeled + len(added_set), len(labeled_set))
                    assert set(added_set).issubset(labeled_set)
                    self.assertEqual(len(list(set(agent.labeled_set) & set(agent.unlabeled_set))), 0)
                    self.assertEqual(n_old_unlabeled - len(added_set), len(agent.unlabeled_set))
示例#2
0
    def test_agent_query_too_few(self):

        self.api_workflow_client.embedding_id = "embedding_id_xyz"
        agent = ActiveLearningAgent(
            self.api_workflow_client,
            preselected_tag_name="preselected_tag_name_xyz",
        )

        # sample 0 samples
        sampler_config = SamplerConfig(n_samples=0,
                                       method=SamplingMethod.RANDOM)

        agent.query(sampler_config)
示例#3
0
    def test_agent_only_upload_scores(self):
        self.api_workflow_client.embedding_id = "embedding_id_xyz"
        agent = ActiveLearningAgent(
            self.api_workflow_client,
            preselected_tag_name="preselected_tag_name_xyz",
        )

        n_predictions = len(agent.query_set)
        predictions = np.random.rand(n_predictions, 10).astype(np.float32)
        predictions_normalized = predictions / np.sum(predictions,
                                                      axis=1)[:, np.newaxis]
        al_scorer = ScorerClassification(predictions_normalized)

        agent.upload_scores(al_scorer)
    def test_agent_wrong_scores(self):
        self.api_workflow_client.embedding_id = "embedding_id_xyz"

        agent = ActiveLearningAgent(self.api_workflow_client, preselected_tag_name="preselected_tag_name_xyz")
        method = SamplingMethod.CORAL
        n_samples = len(agent.labeled_set) + 2

        n_predictions = len(agent.unlabeled_set) - 3  # the -3 should cause en error
        predictions = np.random.rand(n_predictions, 10).astype(np.float32)
        predictions_normalized = predictions / np.sum(predictions, axis=1)[:, np.newaxis]
        al_scorer = ScorerClassification(predictions_normalized)

        sampler_config = SamplerConfig(n_samples=n_samples, method=method)
        with self.assertRaises(ValueError):
            labeled_set, added_set = agent.query(sampler_config=sampler_config, al_scorer=al_scorer)
示例#5
0
    def test_agent_without_embedding_id(self):
        agent = ActiveLearningAgent(
            self.api_workflow_client,
            preselected_tag_name="preselected_tag_name_xyz")
        method = SamplingMethod.CORAL
        n_samples = len(agent.labeled_set) + 2

        n_predictions = len(agent.query_set)
        predictions = np.random.rand(n_predictions, 10).astype(np.float32)
        predictions_normalized = predictions / np.sum(predictions,
                                                      axis=1)[:, np.newaxis]
        al_scorer = ScorerClassification(predictions_normalized)

        sampler_config = SamplerConfig(n_samples=n_samples, method=method)
        agent.query(sampler_config=sampler_config, al_scorer=al_scorer)
    def test_agent(self):
        self.api_workflow_client.embedding_id = "embedding_id_xyz"

        agent_0 = ActiveLearningAgent(self.api_workflow_client)
        agent_1 = ActiveLearningAgent(self.api_workflow_client,
                                      query_tag_name="query_tag_name_xyz")
        agent_2 = ActiveLearningAgent(
            self.api_workflow_client,
            query_tag_name="query_tag_name_xyz",
            preselected_tag_name="preselected_tag_name_xyz")
        agent_3 = ActiveLearningAgent(
            self.api_workflow_client,
            preselected_tag_name="preselected_tag_name_xyz")

        for method in [
                SamplingMethod.CORAL, SamplingMethod.CORESET,
                SamplingMethod.RANDOM
        ]:
            for agent in [agent_0, agent_1, agent_2, agent_3]:
                for batch_size in [2, 6]:
                    n_samples = len(agent.labeled_set) + batch_size
                    if method == SamplingMethod.CORAL and len(
                            agent.labeled_set) > 0:
                        sampler_config = SamplerConfig(
                            n_samples=n_samples, method=SamplingMethod.CORESET)
                    else:
                        sampler_config = SamplerConfig(n_samples=n_samples,
                                                       method=method)

                    if sampler_config.method == SamplingMethod.CORESET:
                        predictions = np.random.rand(len(agent.unlabeled_set),
                                                     10)
                        predictions_normalized = predictions / np.sum(
                            predictions, axis=1)[:, np.newaxis]
                        al_scorer = ScorerClassification(
                            predictions_normalized)
                        chosen_filenames = agent.query(
                            sampler_config=sampler_config, al_scorer=al_scorer)
                    else:
                        sampler_config = SamplerConfig(n_samples=n_samples)
                        chosen_filenames = agent.query(
                            sampler_config=sampler_config)
示例#7
0
    def test_agent_added_set_before_query(self):

        self.api_workflow_client.embedding_id = "embedding_id_xyz"
        agent = ActiveLearningAgent(
            self.api_workflow_client,
            preselected_tag_name="preselected_tag_name_xyz")

        agent.query_set
        agent.labeled_set
        agent.unlabeled_set
        with self.assertRaises(RuntimeError):
            agent.added_set
示例#8
0
    def test_agent_with_generator(self):
        self.api_workflow_client.embedding_id = "embedding_id_xyz"
        width = 32
        height = 32
        no_classes = 13

        agent = ActiveLearningAgent(
            self.api_workflow_client,
            preselected_tag_name="preselected_tag_name_xyz")
        method = SamplingMethod.CORAL
        n_samples = len(agent.labeled_set) + 2

        n_predictions = len(agent.query_set)
        predictions = np.random.rand(n_predictions, no_classes, width,
                                     height).astype(np.float32)
        predictions_normalized = predictions / np.sum(predictions,
                                                      axis=1)[:, np.newaxis]
        predictions_generator = (predictions_normalized[i]
                                 for i in range(n_predictions))
        al_scorer = ScorerSemanticSegmentation(predictions_generator)

        sampler_config = SamplerConfig(n_samples=n_samples, method=method)
        agent.query(sampler_config=sampler_config, al_scorer=al_scorer)

        # make sure we throw an error if generator is already consumed
        with self.assertRaises(ValueError):
            agent.upload_scores(al_scorer)
示例#9
0
def t_est_active_learning(api_workflow_client: ApiWorkflowClient,
                          method: SamplingMethod = SamplingMethod.CORAL,
                          query_tag_name: str = 'initial-tag',
                          preselected_tag_name: str = None,
                          n_samples_additional: List[int] = [2, 5]):
    # create the tags with 100 respectively 10 samples if not yet existant
    if query_tag_name is not None:
        sampler_config = SamplerConfig(method=SamplingMethod.RANDOM,
                                       n_samples=100,
                                       name=query_tag_name)
        try:
            api_workflow_client.sampling(sampler_config=sampler_config)
        except RuntimeError:
            pass
    if preselected_tag_name is not None:
        sampler_config = SamplerConfig(method=SamplingMethod.RANDOM,
                                       n_samples=10,
                                       name=preselected_tag_name)
        try:
            api_workflow_client.sampling(sampler_config=sampler_config)
        except RuntimeError:
            pass

    # define the active learning agent
    agent = ActiveLearningAgent(api_workflow_client,
                                query_tag_name=query_tag_name,
                                preselected_tag_name=preselected_tag_name)

    total_no_samples = len(agent.unlabeled_set) + len(agent.labeled_set)

    al_scorer = None

    for iteration, n_samples_additional in enumerate(n_samples_additional):
        n_samples = len(agent.labeled_set) + n_samples_additional
        print(
            f"Beginning with iteration {iteration} to have {n_samples} labeled samples."
        )

        # Perform a sampling
        method_here = SamplingMethod.CORESET if iteration == 0 and method == SamplingMethod.CORAL else method
        sampler_config = SamplerConfig(method=method_here, n_samples=n_samples)
        if al_scorer is None:
            agent.query(sampler_config=sampler_config)
        else:
            agent.query(sampler_config=sampler_config, al_scorer=al_scorer)

        assert len(agent.labeled_set) == n_samples
        assert len(agent.unlabeled_set) == total_no_samples - n_samples

        # Update the scorer
        n_samples = len(agent.query_set)
        n_classes = 10
        predictions = np.random.rand(n_samples, n_classes)
        predictions_normalized = predictions / np.sum(predictions,
                                                      axis=1)[:, np.newaxis]
        model_output = predictions_normalized
        al_scorer = ScorerClassification(model_output=predictions)

    print("Success!")
示例#10
0
# %%
# First we read the variables we set before as environment variables via the console
token = os.getenv("LIGHTLY_TOKEN", default="YOUR_TOKEN")
path_to_embeddings_csv = os.getenv("LIGHTLY_EMBEDDINGS_CSV",
                                   default="path_to_your_embeddings_csv")

# We define the client to the Lightly Platform API
api_workflow_client = ApiWorkflowClient(token=token)
api_workflow_client.create_dataset(
    dataset_name="active_learning_clothing_dataset")

# %%
# We define the dataset, the classifier and the active learning agent
dataset = CSVEmbeddingDataset(path_to_embeddings_csv=path_to_embeddings_csv)
classifier = LogisticRegression(max_iter=1000)
agent = ActiveLearningAgent(api_workflow_client=api_workflow_client)

# %%
# 1. Choose an initial subset of your dataset.
# We want to start with 200 samples and use the CORESET sampler for sampling them.
print("Starting the initial sampling")
sampler_config = SamplerConfig(n_samples=200,
                               method=SamplingMethod.CORESET,
                               name='initial-selection')
agent.query(sampler_config=sampler_config)
print(f"There are {len(agent.labeled_set)} samples in the labeled set.")

# %%
# 2. Train a classifier on the labeled set.
labeled_set_features = dataset.get_features(agent.labeled_set)
labeled_set_labels = dataset.get_labels(agent.labeled_set)