def __init__( self, root: Union[str, Path] = None, *, train=True, transform=None, target_transform=None, loader=default_loader, download=True ): """ :param root: root dir where the dataset can be found or downloaded. Defaults to None, which means that the default location for 'CUB_200_2011' will be used. :param train: train or test subset of the original dataset. Default to True. :param transform: eventual input data transformations to apply. Default to None. :param target_transform: eventual target data transformations to apply. Default to None. :param loader: method to load the data from disk. Default to torchvision default_loader. :param download: default set to True. If the data is already downloaded it will skip the download. """ if root is None: root = default_dataset_location("CUB_200_2011") self.train = train DownloadableDataset.__init__( self, root, download=download, verbose=True ) self._load_dataset() PathsDataset.__init__( self, os.path.join(root, CUB200.images_folder), self._images, transform=transform, target_transform=target_transform, loader=loader, )
def create_generic_benchmark_from_paths( train_lists_of_files: Sequence[Sequence[FileAndLabel]], test_lists_of_files: Union[ Sequence[FileAndLabel], Sequence[Sequence[FileAndLabel]] ], *, other_streams_lists_of_files: Dict[ str, Sequence[Sequence[FileAndLabel]] ] = None, task_labels: Sequence[int], complete_test_set_only: bool = False, train_transform=None, train_target_transform=None, eval_transform=None, eval_target_transform=None, other_streams_transforms: Dict[str, Tuple[Any, Any]] = None, dataset_type: AvalancheDatasetType = AvalancheDatasetType.UNDEFINED ) -> GenericCLScenario: """ Creates a benchmark instance given a sequence of lists of files. A separate dataset will be created for each list. Each of those datasets will be considered a separate experience. This is very similar to :func:`create_generic_benchmark_from_filelists`, with the main difference being that :func:`create_generic_benchmark_from_filelists` accepts, for each experience, a file list formatted in Caffe-style. On the contrary, this accepts a list of tuples where each tuple contains two elements: the full path to the pattern and its label. Optionally, the tuple may contain a third element describing the bounding box of the element to crop. This last bounding box may be useful when trying to extract the part of the image depicting the desired element. Apart from that, the same limitations of :func:`create_generic_benchmark_from_filelists` regarding task labels apply. The label of each pattern doesn't have to be an int. Also, a dataset type can be defined. :param train_lists_of_files: A list of lists. Each list describes the paths and labels of patterns to include in that training experience, as tuples. Each tuple must contain two elements: the full path to the pattern and its class label. Optionally, the tuple may contain a third element describing the bounding box to use for cropping (top, left, height, width). :param test_lists_of_files: A list of lists. Each list describes the paths and labels of patterns to include in that test experience, as tuples. Each tuple must contain two elements: the full path to the pattern and its class label. Optionally, the tuple may contain a third element describing the bounding box to use for cropping (top, left, height, width). :param other_streams_lists_of_files: A dictionary describing the content of custom streams. Keys must be valid stream names (letters and numbers, not starting with a number) while the value follow the same structure of `train_lists_of_files` and `test_lists_of_files` parameters. If this dictionary contains the definition for "train" or "test" streams then those definition will override the `train_lists_of_files` and `test_lists_of_files` parameters. :param task_labels: A list of task labels. Must contain at least a value for each experience. Each value describes the task label that will be applied to all patterns of a certain experience. For more info on that, see the function description. :param complete_test_set_only: If True, only the complete test set will be returned by the benchmark. This means that the ``test_list_of_files`` parameter must define a single experience (the complete test set). Defaults to False. :param train_transform: The transformation to apply to the training data, e.g. a random crop, a normalization or a concatenation of different transformations (see torchvision.transform documentation for a comprehensive list of possible transformations). Defaults to None. :param train_target_transform: The transformation to apply to training patterns targets. Defaults to None. :param eval_transform: The transformation to apply to the test data, e.g. a random crop, a normalization or a concatenation of different transformations (see torchvision.transform documentation for a comprehensive list of possible transformations). Defaults to None. :param eval_target_transform: The transformation to apply to test patterns targets. Defaults to None. :param other_streams_transforms: Transformations to apply to custom streams. If no transformations are defined for a custom stream, then "train" transformations will be used. This parameter must be a dictionary mapping stream names to transformations. The transformations must be a two elements tuple where the first element defines the X transformation while the second element is the Y transformation. Those elements can be None. If this dictionary contains the transformations for "train" or "test" streams then those transformations will override the `train_transform`, `train_target_transform`, `eval_transform` and `eval_target_transform` parameters. :param dataset_type: The type of the dataset. Defaults to UNDEFINED. :returns: A :class:`GenericCLScenario` instance. """ input_streams = dict(train=train_lists_of_files, test=test_lists_of_files) if other_streams_lists_of_files is not None: input_streams = {**input_streams, **other_streams_lists_of_files} stream_definitions = dict() for stream_name, lists_of_files in input_streams.items(): stream_datasets = [] for exp_id, list_of_files in enumerate(lists_of_files): common_root, exp_paths_list = common_paths_root(list_of_files) paths_dataset = PathsDataset(common_root, exp_paths_list) stream_datasets.append( AvalancheDataset(paths_dataset, task_labels=task_labels[exp_id]) ) stream_definitions[stream_name] = stream_datasets return create_multi_dataset_generic_benchmark( [], [], other_streams_datasets=stream_definitions, train_transform=train_transform, train_target_transform=train_target_transform, eval_transform=eval_transform, eval_target_transform=eval_target_transform, complete_test_set_only=complete_test_set_only, other_streams_transforms=other_streams_transforms, dataset_type=dataset_type, )
def CTrL( stream_name: str, save_to_disk: bool = False, path: Path = default_dataset_location(""), seed: int = None, n_tasks: int = None, ): """ Gives access to the Continual Transfer Learning benchmark streams introduced in https://arxiv.org/abs/2012.12631. :param stream_name: Name of the test stream to generate. Must be one of `s_plus`, `s_minus`, `s_in`, `s_out` and `s_pl`. :param save_to_disk: Whether to save each stream on the disk or load everything in memory. Setting it to `True` will save memory but takes more time on the first generation using the corresponding seed. :param path: The path under which the generated stream will be saved if save_to_disk is True. :param seed: The seed to use to generate the streams. If no seed is given, a random one will be used to make sure that the generated stream can be reproduced. :param n_tasks: The number of tasks to generate. This parameter is only relevant for the `s_long` stream, as all other streams have a fixed number of tasks. :return: A scenario containing 3 streams: train, val and test. """ seed = seed or random.randint(0, sys.maxsize) if stream_name != "s_long" and n_tasks is not None: raise ValueError("The n_tasks parameter can only be used with the " f'"s_long" stream, asked {n_tasks} for {stream_name}') elif stream_name == "s_long" and n_tasks is None: n_tasks = 100 stream = ctrl.get_stream(stream_name, seed) if save_to_disk: folder = path / "ctrl" / stream_name / f"seed_{seed}" # Train, val and test experiences exps = [[], [], []] for t_id, t in enumerate(tqdm(stream, desc=f"Loading {stream_name}"), ): trans = transforms.Normalize(t.statistics["mean"], t.statistics["std"]) for split, split_name, exp in zip(t.datasets, t.split_names, exps): samples, labels = split.tensors task_labels = [t.id] * samples.size(0) if save_to_disk: exp_folder = folder / f"exp_{t_id}" / split_name exp_folder.mkdir(parents=True, exist_ok=True) files = [] for i, (sample, label) in enumerate(zip(samples, labels)): sample_path = exp_folder / f"sample_{i}.png" if not sample_path.exists(): F.to_pil_image(sample).save(sample_path) files.append((sample_path, label.item())) common_root, exp_paths_list = common_paths_root(files) paths_dataset = PathsDataset(common_root, exp_paths_list) dataset = AvalancheDataset( paths_dataset, task_labels=task_labels, transform=transforms.Compose( [transforms.ToTensor(), trans]), ) else: dataset = AvalancheTensorDataset( samples, labels.squeeze(1), task_labels=task_labels, transform=trans, ) exp.append(dataset) if stream_name == "s_long" and t_id == n_tasks - 1: break return dataset_benchmark( train_datasets=exps[0], test_datasets=exps[2], other_streams_datasets=dict(val=exps[1]), )