示例#1
0
def _download_cli(cfg, is_cli_call=True):

    tag_name = cfg['tag_name']
    dataset_id = cfg['dataset_id']
    token = cfg['token']

    if not tag_name:
        print('Please specify a tag name')
        print('For help, try: lightly-download --help')
        return

    if not token or not dataset_id:
        print('Please specify your access token and dataset id')
        print('For help, try: lightly-download --help')
        return

    api_workflow_client = ApiWorkflowClient(token=token, dataset_id=dataset_id)

    # get tag id
    tag_name_id_dict = dict([tag.name, tag.id]
                            for tag in api_workflow_client._get_all_tags())
    tag_id = tag_name_id_dict.get(tag_name, None)
    if tag_id is None:
        print(f'The specified tag {tag_name} does not exist.')
        return

    # get tag data
    tag_data = api_workflow_client.tags_api.get_tag_by_tag_id(
        dataset_id=dataset_id, tag_id=tag_id)

    # get samples
    chosen_samples_ids = BitMask.from_hex(tag_data.bit_mask_data).to_indices()
    samples = [
        api_workflow_client.filenames_on_server[i] for i in chosen_samples_ids
    ]

    # store sample names in a .txt file
    with open(cfg['tag_name'] + '.txt', 'w') as f:
        for item in samples:
            f.write("%s\n" % item)

    msg = 'The list of files in tag {} is stored at: '.format(cfg['tag_name'])
    msg += os.path.join(os.getcwd(), cfg['tag_name'] + '.txt')
    print(msg, flush=True)

    if not cfg['input_dir'] and cfg['output_dir']:
        # download full images from api
        output_dir = fix_input_path(cfg['output_dir'])
        api_workflow_client.download_dataset(output_dir, tag_name=tag_name)

    elif cfg['input_dir'] and cfg['output_dir']:
        input_dir = fix_input_path(cfg['input_dir'])
        output_dir = fix_input_path(cfg['output_dir'])
        print(f'Copying files from {input_dir} to {output_dir}.')

        # create a dataset from the input directory
        dataset = data.LightlyDataset(input_dir=input_dir)

        # dump the dataset in the output directory
        dataset.dump(output_dir, samples)
示例#2
0
def t_est_active_learning(api_workflow_client: ApiWorkflowClient,
                          method: SamplingMethod = SamplingMethod.CORAL,
                          query_tag_name: str = 'initial-tag',
                          preselected_tag_name: str = None,
                          n_samples_additional: List[int] = [2, 5]):
    # create the tags with 100 respectively 10 samples if not yet existant
    if query_tag_name is not None:
        sampler_config = SamplerConfig(method=SamplingMethod.RANDOM,
                                       n_samples=100,
                                       name=query_tag_name)
        try:
            api_workflow_client.sampling(sampler_config=sampler_config)
        except RuntimeError:
            pass
    if preselected_tag_name is not None:
        sampler_config = SamplerConfig(method=SamplingMethod.RANDOM,
                                       n_samples=10,
                                       name=preselected_tag_name)
        try:
            api_workflow_client.sampling(sampler_config=sampler_config)
        except RuntimeError:
            pass

    # define the active learning agent
    agent = ActiveLearningAgent(api_workflow_client,
                                query_tag_name=query_tag_name,
                                preselected_tag_name=preselected_tag_name)

    total_no_samples = len(agent.unlabeled_set) + len(agent.labeled_set)

    al_scorer = None

    for iteration, n_samples_additional in enumerate(n_samples_additional):
        n_samples = len(agent.labeled_set) + n_samples_additional
        print(
            f"Beginning with iteration {iteration} to have {n_samples} labeled samples."
        )

        # Perform a sampling
        method_here = SamplingMethod.CORESET if iteration == 0 and method == SamplingMethod.CORAL else method
        sampler_config = SamplerConfig(method=method_here, n_samples=n_samples)
        if al_scorer is None:
            agent.query(sampler_config=sampler_config)
        else:
            agent.query(sampler_config=sampler_config, al_scorer=al_scorer)

        assert len(agent.labeled_set) == n_samples
        assert len(agent.unlabeled_set) == total_no_samples - n_samples

        # Update the scorer
        n_samples = len(agent.query_set)
        n_classes = 10
        predictions = np.random.rand(n_samples, n_classes)
        predictions_normalized = predictions / np.sum(predictions,
                                                      axis=1)[:, np.newaxis]
        model_output = predictions_normalized
        al_scorer = ScorerClassification(model_output=predictions)

    print("Success!")
示例#3
0
def _upload_cli(cfg, is_cli_call=True):
    input_dir = cfg['input_dir']
    if input_dir and is_cli_call:
        input_dir = fix_input_path(input_dir)

    path_to_embeddings = cfg['embeddings']
    if path_to_embeddings and is_cli_call:
        path_to_embeddings = fix_input_path(path_to_embeddings)

    dataset_id = cfg['dataset_id']
    token = cfg['token']
    new_dataset_name = cfg['new_dataset_name']

    cli_api_args_wrong = False
    if not token:
        print_as_warning('Please specify your access token.')
        cli_api_args_wrong = True

    dataset_id_ok = dataset_id and len(dataset_id) > 0
    new_dataset_name_ok = new_dataset_name and len(new_dataset_name) > 0
    if new_dataset_name_ok and not dataset_id_ok:
        api_workflow_client = ApiWorkflowClient(token=token)
        api_workflow_client.create_dataset(dataset_name=new_dataset_name)
    elif dataset_id_ok and not new_dataset_name_ok:
        api_workflow_client = ApiWorkflowClient(token=token, dataset_id=dataset_id)
    else:
        print_as_warning('Please specify either the dataset_id of an existing dataset or a new_dataset_name.')
        cli_api_args_wrong = True

    if cli_api_args_wrong:
        print_as_warning('For help, try: lightly-upload --help')
        return

    size = cfg['resize']
    if not isinstance(size, int):
        size = tuple(size)
    transform = None
    if isinstance(size, tuple) or size > 0:
        transform = torchvision.transforms.Resize(size)

    if input_dir:
        mode = cfg['upload']
        dataset = LightlyDataset(input_dir=input_dir, transform=transform)
        api_workflow_client.upload_dataset(
            input=dataset, mode=mode, max_workers=cfg['loader']['num_workers']
        )
        print(f"Finished the upload of the dataset.")

    if path_to_embeddings:
        name = cfg['embedding_name']
        print("Starting upload of embeddings.")
        api_workflow_client.upload_embeddings(
            path_to_embeddings_csv=path_to_embeddings, name=name
        )
        print("Finished upload of embeddings.")

    if new_dataset_name_ok:
        print(f'The dataset_id of the newly created dataset is '
              f'{bcolors.OKBLUE}{api_workflow_client.dataset_id}{bcolors.ENDC}')
示例#4
0
def create_new_dataset_with_embeddings(path_to_dataset: str,
                                       token: str,
                                       dataset_name: str) -> ApiWorkflowClient:
    api_workflow_client = ApiWorkflowClient(token=token)

    # create the dataset
    api_workflow_client.create_new_dataset_with_unique_name(dataset_basename=dataset_name)

    # upload to the dataset
    api_workflow_client.upload_dataset(input=path_to_dataset)

    # calculate and save the embeddings
    path_to_embeddings_csv = f"{path_to_dataset}/embeddings.csv"
    if not os.path.isfile(path_to_embeddings_csv):
        dataset = LightlyDataset(input_dir=path_to_dataset)
        embeddings = np.random.normal(size=(len(dataset.dataset.samples), 32))
        filepaths, labels = zip(*dataset.dataset.samples)
        filenames = [filepath[len(path_to_dataset):].lstrip('/') for filepath in filepaths]
        print("Starting save of embeddings")
        save_embeddings(path_to_embeddings_csv, embeddings, labels, filenames)
        print("Finished save of embeddings")

    # upload the embeddings
    api_workflow_client.upload_embeddings(path_to_embeddings_csv=path_to_embeddings_csv, name="embedding_1")

    return api_workflow_client
示例#5
0
def _upload_cli(cfg, is_cli_call=True):
    input_dir = cfg['input_dir']
    if input_dir and is_cli_call:
        input_dir = fix_input_path(input_dir)

    path_to_embeddings = cfg['embeddings']
    if path_to_embeddings and is_cli_call:
        path_to_embeddings = fix_input_path(path_to_embeddings)

    dataset_id = cfg['dataset_id']
    token = cfg['token']
    new_dataset_name = cfg['new_dataset_name']

    if not token:
        warnings.warn('Please specify your access token. For help, try: lightly-upload --help')
        return

    dataset_id_ok = dataset_id and len(dataset_id) > 0
    new_dataset_name_ok = new_dataset_name and len(new_dataset_name) > 0
    if new_dataset_name_ok and not dataset_id_ok:
        api_workflow_client = ApiWorkflowClient(token=token)
        api_workflow_client.create_dataset(dataset_name=new_dataset_name)
    elif dataset_id_ok and not new_dataset_name_ok:
        api_workflow_client = ApiWorkflowClient(token=token, dataset_id=dataset_id)
    else:
        warnings.warn('Please specify either the dataset_id of an existing dataset or a new_dataset_name. '
                      'For help, try: lightly-upload --help')
        return

    size = cfg['resize']
    if not isinstance(size, int):
        size = tuple(size)
    transform = None
    if isinstance(size, tuple) or size > 0:
        transform = torchvision.transforms.Resize(size)

    if input_dir:
        mode = cfg['upload']
        dataset = LightlyDataset(input_dir=input_dir, transform=transform)
        api_workflow_client.upload_dataset(
            input=dataset, mode=mode, max_workers=cfg['loader']['num_workers']
        )

    if path_to_embeddings:
        name = cfg['embedding_name']
        api_workflow_client.upload_embeddings(
            path_to_embeddings_csv=path_to_embeddings, name=name
        )
    def __init__(self, *args, **kwargs):
        lightly.api.api_workflow_client.ApiClient = MockedApiClient
        lightly.api.version_checking.VersioningApi = MockedVersioningApi
        ApiWorkflowClient.__init__(self, *args, **kwargs)

        self.samplings_api = MockedSamplingsApi(api_client=self.api_client)
        self.jobs_api = MockedJobsApi(api_client=self.api_client)
        self.tags_api = MockedTagsApi(api_client=self.api_client)
        self.embeddings_api = MockedEmbeddingsApi(api_client=self.api_client)
        self.mappings_api = MockedMappingsApi(api_client=self.api_client)
        self.scores_api = MockedScoresApi(api_client=self.api_client)
        self.samples_api = MockedSamplesApi(api_client=self.api_client)
        self.datasets_api = MockedDatasetsApi(api_client=self.api_client)
        self.quota_api = MockedQuotaApi(api_client=self.api_client)

        lightly.api.api_workflow_client.put_request = mocked_put_request

        self.wait_time_till_next_poll = 0.001  # for api_workflow_sampling
示例#7
0
def create_new_dataset_with_embeddings(path_to_dataset: str,
                                       token: str,
                                       dataset_name: str) -> ApiWorkflowClient:
    api_workflow_client = ApiWorkflowClient(token=token)

    # create the dataset
    api_workflow_client.create_new_dataset_with_unique_name(dataset_basename=dataset_name)

    # upload to the dataset
    initialize(config_path="../../lightly/cli/config", job_name="test_app")
    cfg = compose(config_name="config", overrides=[
        f"input_dir='{path_to_dataset}'",
        f"token='{token}'",
        f"dataset_id={api_workflow_client.dataset_id}"
        ])
    upload_cli(cfg)

    # calculate and save the embeddings
    path_to_embeddings_csv = f"{path_to_dataset}/embeddings.csv"
    if not os.path.isfile(path_to_embeddings_csv):
        dataset = LightlyDataset(input_dir=path_to_dataset)
        embeddings = np.random.normal(size=(len(dataset.dataset.samples), 32))
        filepaths, labels = zip(*dataset.dataset.samples)
        filenames = [filepath[len(path_to_dataset):].lstrip('/') for filepath in filepaths]
        print("Starting save of embeddings")
        save_embeddings(path_to_embeddings_csv, embeddings, labels, filenames)
        print("Finished save of embeddings")

    # upload the embeddings
    print("Starting upload of embeddings.")
    api_workflow_client.upload_embeddings(path_to_embeddings_csv=path_to_embeddings_csv, name="embedding_1")
    print("Finished upload of embeddings.")

    return api_workflow_client
示例#8
0
def _download_cli(cfg, is_cli_call=True):

    tag_name = str(cfg['tag_name'])
    dataset_id = str(cfg['dataset_id'])
    token = str(cfg['token'])

    if not tag_name or not token or not dataset_id:
        print_as_warning('Please specify all of the parameters tag_name, token and dataset_id')
        print_as_warning('For help, try: lightly-download --help')
        return

    api_workflow_client = ApiWorkflowClient(
        token=token, dataset_id=dataset_id
    )

    # get tag id
    tag_data = api_workflow_client.get_tag_by_name(tag_name)
    filenames_tag = api_workflow_client.get_filenames_in_tag(
        tag_data,
        exclude_parent_tag=cfg['exclude_parent_tag'],
    )

    # store sample names in a .txt file
    filename = tag_name + '.txt'
    with open(filename, 'w') as f:
        for item in filenames_tag:
            f.write("%s\n" % item)

    filepath = os.path.join(os.getcwd(), filename)
    msg = f'The list of files in tag {cfg["tag_name"]} is stored at: {bcolors.OKBLUE}{filepath}{bcolors.ENDC}'
    print(msg, flush=True)

    if not cfg['input_dir'] and cfg['output_dir']:
        # download full images from api
        output_dir = fix_input_path(cfg['output_dir'])
        api_workflow_client.download_dataset(output_dir, tag_name=tag_name)

    elif cfg['input_dir'] and cfg['output_dir']:
        input_dir = fix_input_path(cfg['input_dir'])
        output_dir = fix_input_path(cfg['output_dir'])
        print(f'Copying files from {input_dir} to {bcolors.OKBLUE}{output_dir}{bcolors.ENDC}.')

        # create a dataset from the input directory
        dataset = data.LightlyDataset(input_dir=input_dir)

        # dump the dataset in the output directory
        dataset.dump(output_dir, filenames_tag)
示例#9
0
def _upload_cli(cfg, is_cli_call=True):

    input_dir = cfg['input_dir']
    if input_dir and is_cli_call:
        input_dir = fix_input_path(input_dir)

    path_to_embeddings = cfg['embeddings']
    if path_to_embeddings and is_cli_call:
        path_to_embeddings = fix_input_path(path_to_embeddings)

    dataset_id = cfg['dataset_id']
    token = cfg['token']

    size = cfg['resize']
    if not isinstance(size, int):
        size = tuple(size)
    transform = None
    if isinstance(size, tuple) or size > 0:
        transform = torchvision.transforms.Resize(size)

    if not token or not dataset_id:
        print('Please specify your access token and dataset id.')
        print('For help, try: lightly-upload --help')
        return

    api_workflow_client = ApiWorkflowClient(token=token, dataset_id=dataset_id)

    if input_dir:
        mode = cfg['upload']
        dataset = LightlyDataset(input_dir=input_dir, transform=transform)
        api_workflow_client.upload_dataset(input=dataset, mode=mode)

    if path_to_embeddings:
        name = cfg['embedding_name']
        api_workflow_client.upload_embeddings(
            path_to_embeddings_csv=path_to_embeddings, name=name)
示例#10
0
def _upload_cli(cfg, is_cli_call=True):
    input_dir = cfg['input_dir']
    if input_dir and is_cli_call:
        input_dir = fix_input_path(input_dir)

    path_to_embeddings = cfg['embeddings']
    if path_to_embeddings and is_cli_call:
        path_to_embeddings = fix_input_path(path_to_embeddings)

    dataset_id = cfg['dataset_id']
    token = cfg['token']
    new_dataset_name = cfg['new_dataset_name']

    cli_api_args_wrong = False
    if not token:
        print_as_warning('Please specify your access token.')
        cli_api_args_wrong = True

    if dataset_id:
        if new_dataset_name:
            print_as_warning(
                'Please specify either the dataset_id of an existing dataset '
                'or a new_dataset_name, but not both.')
            cli_api_args_wrong = True
        else:
            api_workflow_client = \
                ApiWorkflowClient(token=token, dataset_id=dataset_id)
    else:
        if new_dataset_name:
            api_workflow_client = ApiWorkflowClient(token=token)
            api_workflow_client.create_dataset(dataset_name=new_dataset_name)
        else:
            print_as_warning(
                'Please specify either the dataset_id of an existing dataset '
                'or a new_dataset_name.')
            cli_api_args_wrong = True
    # delete the dataset_id as it might be an empty string
    # Use api_workflow_client.dataset_id instead
    del dataset_id

    if cli_api_args_wrong:
        print_as_warning('For help, try: lightly-upload --help')
        return

    # potentially load custom metadata
    custom_metadata = None
    if cfg['custom_metadata']:
        path_to_custom_metadata = fix_input_path(cfg['custom_metadata'])
        print('Loading custom metadata from '
              f'{bcolors.OKBLUE}{path_to_custom_metadata}{bcolors.ENDC}')
        with open(path_to_custom_metadata, 'r') as f:
            custom_metadata = json.load(f)

    # set the number of workers if unset
    if cfg['loader']['num_workers'] < 0:
        # set the number of workers to the number of CPUs available,
        # but minimum of 8
        num_workers = max(8, cpu_count())
        num_workers = min(32, num_workers)
        cfg['loader']['num_workers'] = num_workers

    size = cfg['resize']
    if not isinstance(size, int):
        size = tuple(size)
    transform = None
    if isinstance(size, tuple) or size > 0:
        transform = torchvision.transforms.Resize(size)

    if input_dir:
        mode = cfg['upload']
        dataset = LightlyDataset(input_dir=input_dir, transform=transform)
        api_workflow_client.upload_dataset(
            input=dataset,
            mode=mode,
            max_workers=cfg['loader']['num_workers'],
            custom_metadata=custom_metadata,
        )
        print('Finished the upload of the dataset.')

    if path_to_embeddings:
        name = cfg['embedding_name']
        print('Starting upload of embeddings.')
        api_workflow_client.upload_embeddings(
            path_to_embeddings_csv=path_to_embeddings, name=name)
        print('Finished upload of embeddings.')

    if custom_metadata is not None and not input_dir:
        # upload custom metadata separately
        api_workflow_client.upload_custom_metadata(
            custom_metadata,
            verbose=True,
            max_workers=cfg['loader']['num_workers'],
        )

    if new_dataset_name:
        print(
            f'The dataset_id of the newly created dataset is '
            f'{bcolors.OKBLUE}{api_workflow_client.dataset_id}{bcolors.ENDC}')

    os.environ[cfg['environment_variable_names']
               ['lightly_last_dataset_id']] = api_workflow_client.dataset_id
示例#11
0
def _download_cli(cfg, is_cli_call=True):
    tag_name = cfg['tag_name']
    dataset_id = cfg['dataset_id']
    token = cfg['token']

    if not tag_name or not token or not dataset_id:
        print_as_warning(
            'Please specify all of the parameters tag_name, token and dataset_id'
        )
        print_as_warning('For help, try: lightly-download --help')
        return

    api_workflow_client = ApiWorkflowClient(token=token, dataset_id=dataset_id)

    # get tag id
    tag_name_id_dict = dict([tag.name, tag.id]
                            for tag in api_workflow_client._get_all_tags())
    tag_id = tag_name_id_dict.get(tag_name, None)
    if tag_id is None:
        warnings.warn(f'The specified tag {tag_name} does not exist.')
        return

    # get tag data
    tag_data: TagData = api_workflow_client.tags_api.get_tag_by_tag_id(
        dataset_id=dataset_id, tag_id=tag_id)

    if cfg["exclude_parent_tag"]:
        parent_tag_id = tag_data.prev_tag_id
        tag_arithmetics_request = TagArithmeticsRequest(
            tag_id1=tag_data.id,
            tag_id2=parent_tag_id,
            operation=TagArithmeticsOperation.DIFFERENCE)
        bit_mask_response: TagBitMaskResponse \
            = api_workflow_client.tags_api.perform_tag_arithmetics(body=tag_arithmetics_request, dataset_id=dataset_id)
        bit_mask_data = bit_mask_response.bit_mask_data
    else:
        bit_mask_data = tag_data.bit_mask_data

    # get samples
    chosen_samples_ids = BitMask.from_hex(bit_mask_data).to_indices()
    samples = [
        api_workflow_client.filenames_on_server[i] for i in chosen_samples_ids
    ]

    # store sample names in a .txt file
    filename = cfg['tag_name'] + '.txt'
    with open(filename, 'w') as f:
        for item in samples:
            f.write("%s\n" % item)

    filepath = os.path.join(os.getcwd(), filename)
    msg = f'The list of files in tag {cfg["tag_name"]} is stored at: {bcolors.OKBLUE}{filepath}{bcolors.ENDC}'
    print(msg, flush=True)

    if not cfg['input_dir'] and cfg['output_dir']:
        # download full images from api
        output_dir = fix_input_path(cfg['output_dir'])
        api_workflow_client.download_dataset(output_dir, tag_name=tag_name)

    elif cfg['input_dir'] and cfg['output_dir']:
        input_dir = fix_input_path(cfg['input_dir'])
        output_dir = fix_input_path(cfg['output_dir'])
        print(
            f'Copying files from {input_dir} to {bcolors.OKBLUE}{output_dir}{bcolors.ENDC}.'
        )

        # create a dataset from the input directory
        dataset = data.LightlyDataset(input_dir=input_dir)

        # dump the dataset in the output directory
        dataset.dump(output_dir, samples)
示例#12
0
        return features_array

    def get_labels(self, filenames: List[str]) -> np.ndarray:
        labels = np.array(
            [self.dataset[filename][1] for filename in filenames])
        return labels


# %%
# First we read the variables we set before as environment variables via the console
token = os.getenv("LIGHTLY_TOKEN", default="YOUR_TOKEN")
path_to_embeddings_csv = os.getenv("LIGHTLY_EMBEDDINGS_CSV",
                                   default="path_to_your_embeddings_csv")

# We define the client to the Lightly Platform API
api_workflow_client = ApiWorkflowClient(token=token)
api_workflow_client.create_dataset(
    dataset_name="active_learning_clothing_dataset")

# %%
# We define the dataset, the classifier and the active learning agent
dataset = CSVEmbeddingDataset(path_to_embeddings_csv=path_to_embeddings_csv)
classifier = LogisticRegression(max_iter=1000)
agent = ActiveLearningAgent(api_workflow_client=api_workflow_client)

# %%
# 1. Choose an initial subset of your dataset.
# We want to start with 200 samples and use the CORESET sampler for sampling them.
print("Starting the initial sampling")
sampler_config = SamplerConfig(n_samples=200,
                               method=SamplingMethod.CORESET,
                  for filename, embedding_row, label in zip(filenames, embeddings, labels)])

    def get_features(self, filenames: List[str]) -> np.ndarray:
        features_array = np.array(
            [self.dataset[filename][0] for filename in filenames])
        return features_array

    def get_labels(self, filenames: List[str]) -> np.ndarray:
        labels = np.array(
            [self.dataset[filename][1] for filename in filenames])
        return labels


# %%
# Upload the embeddings to the lightly web platform
api_workflow_client = ApiWorkflowClient(token=YOUR_TOKEN,
                                        dataset_id=YOUR_DATASET_ID)
api_workflow_client.upload_embeddings(
    name="embedding-1", path_to_embeddings_csv=path_to_embeddings_csv)

# %%
# Define the dataset for the classifer, the classifier and the active learning agent
dataset = CSVEmbeddingDataset(path_to_embeddings_csv=path_to_embeddings_csv)
classifier = KNeighborsClassifier(n_neighbors=20, weights='distance')
agent = ActiveLearningAgent(api_workflow_client=api_workflow_client)

# %%
# 1. Choose an initial subset of your dataset.
# We want to start with 100 samples and use the CORESET sampler for sampling them.
print("Starting the initial sampling")
sampler_config = SamplerConfig(n_samples=100,
                               method=SamplingMethod.CORESET,
# ----------------------------
#
# In active learning, we want to pick the new data for which our model struggles
# the most. If we have an image with a single car in it and our model has
# high confidence that there is a car we don't gain a lot by including
# this example in our training data. However, if we focus on images where the
# model is not sure whether the object is a car or a building we want
# to include these images to refine the decision boundary.
#
# First, we need to create an active learning agent in order to
# provide lightly with the model predictions.
# We can use the ApiWorkflowClient for this. Make sure that we use the
# right dataset_id and token.

# create Lightly API client
api_client = ApiWorkflowClient(dataset_id=YOUR_DATASET_ID, token=YOUR_TOKEN)
al_agent = ActiveLearningAgent(api_client)

# %%

# we can access the images of the dataset we want to use for active learning using
# the `al_agent.query_set` property

# let's print the first 3 entries
print(al_agent.query_set[:3])

# %%
# Note, that our active learning agent already synchronized with the Lightly
# Platform and knows the filenames present in our dataset.
#
# Let's verify the length of the `query_set`. The `query_set` is the set of