def test_nc_benchmark_transformations_basic(self):
        # Regression for #577
        ds = CIFAR100(root=expanduser("~") + "/.avalanche/data/cifar100/",
                      train=True,
                      download=True)
        ds = AvalancheDataset(ds, transform=ToTensor())

        scenario = nc_benchmark(ds,
                                ds,
                                n_experiences=10,
                                shuffle=True,
                                seed=1234,
                                task_labels=False)

        exp_0_dataset = scenario.train_stream[0].dataset
        self.assertIsInstance(exp_0_dataset[0][0], Tensor)
def _adapt_lazy_stream(generator, transform_groups, initial_transform_group,
                       dataset_type):
    """
    A simple internal utility to apply transforms and dataset type to all lazily
    generated datasets. Used in the :func:`create_lazy_generic_benchmark`
    benchmark creation helper.

    :return: A datasets in which the proper transformation groups and dataset
        type are applied.
    """

    for dataset in generator:
        dataset = AvalancheDataset(
            dataset,
            transform_groups=transform_groups,
            initial_transform_group=initial_transform_group,
            dataset_type=dataset_type)
        yield dataset
    def test_transform_dataset_composition(self):
        dataset_mnist = MNIST('./data/mnist', download=True,
                              transform=RandomCrop(16))
        x, y = dataset_mnist[0]
        self.assertIsInstance(x, Image)
        self.assertEqual([x.width, x.height], [16, 16])
        self.assertIsInstance(y, int)

        dataset = AvalancheDataset(
            dataset_mnist, transform=ToTensor(),
            target_transform=lambda target: -1)

        x2, y2, t2 = dataset[0]
        self.assertIsInstance(x2, Tensor)
        self.assertEqual(x2.shape, (1, 16, 16))
        self.assertIsInstance(y2, int)
        self.assertEqual(y2, -1)
        self.assertIsInstance(t2, int)
        self.assertEqual(0, t2)
    def test_avalanche_dataset_uniform_task_labels(self):
        dataset_mnist = MNIST('./data/mnist', download=True)
        x, y = dataset_mnist[0]
        dataset = AvalancheDataset(dataset_mnist, transform=ToTensor(),
                                   task_labels=[1] * len(dataset_mnist))
        x2, y2, t2 = dataset[0]

        self.assertIsInstance(x2, Tensor)
        self.assertIsInstance(y2, int)
        self.assertIsInstance(t2, int)
        self.assertEqual(1, t2)
        self.assertTrue(torch.equal(ToTensor()(x), x2))
        self.assertEqual(y, y2)

        self.assertListEqual([1] * len(dataset_mnist),
                             list(dataset.targets_task_labels))

        subset_task1 = dataset.task_set[1]
        self.assertIsInstance(subset_task1, AvalancheDataset)
        self.assertEqual(len(dataset), len(subset_task1))

        with self.assertRaises(KeyError):
            subset_task0 = dataset.task_set[0]
Exemplo n.º 5
0
    def test_avalanche_dataset_from_chained_pytorch_subsets(self):
        tensor_x = torch.rand(500, 3, 28, 28)
        tensor_y = torch.randint(0, 100, (500, ))

        whole_dataset = TensorDataset(tensor_x, tensor_y)

        subset1 = Subset(whole_dataset, indices=list(range(400, 500)))
        subset2 = Subset(subset1, indices=[5, 7, 0])

        dataset = AvalancheDataset(subset2)

        self.assertEqual(3, len(dataset))

        x, y, t = dataset[0]
        self.assertIsInstance(x, Tensor)
        self.assertTrue(torch.equal(tensor_x[405], x))
        self.assertTrue(torch.equal(tensor_y[405], y))
        self.assertEqual(0, t)

        self.assertTrue(
            torch.equal(
                torch.as_tensor(dataset.targets),
                torch.as_tensor([tensor_y[405], tensor_y[407],
                                 tensor_y[400]])))
Exemplo n.º 6
0
def RotatedOmniglot(
        n_experiences: int,
        *,
        seed: Optional[int] = None,
        rotations_list: Optional[Sequence[int]] = None,
        train_transform: Optional[Any] = _default_omniglot_train_transform,
        eval_transform: Optional[Any] = _default_omniglot_eval_transform,
        dataset_root: Union[str, Path] = None) -> NCScenario:
    """
    Creates a Rotated Omniglot benchmark.

    If the dataset is not present in the computer, this method will
    automatically download and store it.

    Random angles are used to rotate the Omniglot images in ``n_experiences``
    different manners. This means that each experience is
    composed of all the original 964 Omniglot classes, but each image is
    rotated in a different way.

    The benchmark instance returned by this method will have two fields,
    `train_stream` and `test_stream`, which can be iterated to obtain
    training and test :class:`Experience`. Each Experience contains the
    `dataset` and the associated task label.

    A progressive task label, starting from "0", is applied to each experience.

    The benchmark API is quite simple and is uniform across all benchmark
    generators. It is recommended to check the tutorial of the "benchmark" API,
    which contains usage examples ranging from "basic" to "advanced".

    :param n_experiences: The number of experiences (tasks) in the current
        benchmark. It indicates how many different rotations of the Omniglot
        dataset have to be created.
    :param seed: A valid int used to initialize the random number generator.
        Can be None.
    :param rotations_list: A list of rotations values in degrees (from -180 to
        180) used to define the rotations. The rotation specified in position
        0 of the list will be applied to the task 0, the rotation specified in
        position 1 will be applied to task 1 and so on.
        If None, value of ``seed`` will be used to define the rotations.
        If non-None, ``seed`` parameter will be ignored.
        Defaults to None.
    :param train_transform: The transformation to apply to the training data
        after the random rotation, e.g. a random crop, a normalization or a
        concatenation of different transformations (see torchvision.transform
        documentation for a comprehensive list of possible transformations).
        If no transformation is passed, the default train transformation
        will be used.
    :param eval_transform: The transformation to apply to the test data
        after the random rotation, e.g. a random crop, a normalization or a
        concatenation of different transformations (see torchvision.transform
        documentation for a comprehensive list of possible transformations).
        If no transformation is passed, the default test transformation
        will be used.
    :param dataset_root: The root path of the dataset. Defaults to None, which
        means that the default location for 'omniglot' will be used.

    :returns: A properly initialized :class:`NCScenario` instance.
    """

    if rotations_list is None:
        rng_rotate = np.random.RandomState(seed)
        rotations_list = [rng_rotate.randint(-180, 181) for _ in range(
            n_experiences)]
    else:
        assert len(rotations_list) == n_experiences, "The number of rotations" \
                                               " should match the number" \
                                               " of incremental experiences."
    assert all(-180 <= rotations_list[i] <= 180
               for i in range(len(rotations_list))), "The value of a rotation" \
                                                     " should be between -180" \
                                                     " and 180 degrees."

    list_train_dataset = []
    list_test_dataset = []

    omniglot_train, omniglot_test = _get_omniglot_dataset(dataset_root)

    # for every incremental experience
    for experience in range(n_experiences):
        rotation_angle = rotations_list[experience]

        rotation = RandomRotation(degrees=(rotation_angle, rotation_angle))

        rotation_transforms = dict(
            train=(rotation, None),
            eval=(rotation, None)
        )

        # Freeze the rotation
        rotated_train = AvalancheDataset(
            omniglot_train,
            transform_groups=rotation_transforms,
            initial_transform_group='train').freeze_transforms()

        rotated_test = AvalancheDataset(
            omniglot_test,
            transform_groups=rotation_transforms,
            initial_transform_group='eval').freeze_transforms()

        list_train_dataset.append(rotated_train)
        list_test_dataset.append(rotated_test)

    return nc_benchmark(
        list_train_dataset,
        list_test_dataset,
        n_experiences=len(list_train_dataset),
        task_labels=True,
        shuffle=False,
        class_ids_from_zero_in_each_exp=True,
        one_dataset_per_exp=True,
        train_transform=train_transform,
        eval_transform=eval_transform)
Exemplo n.º 7
0
def PermutedOmniglot(
        n_experiences: int,
        *,
        seed: Optional[int] = None,
        train_transform: Optional[Any] = _default_omniglot_train_transform,
        eval_transform: Optional[Any] = _default_omniglot_eval_transform,
        dataset_root: Union[str, Path] = None) -> NCScenario:
    """
    Creates a Permuted Omniglot benchmark.

    If the dataset is not present in the computer, this method will
    automatically download and store it.

    Random pixel permutations are used to permute the Omniglot images in
    ``n_experiences`` different manners. This means that each experience is
    composed of all the original 964 Omniglot classes, but the pixel in the
    images are permuted in a different way.

    The benchmark instance returned by this method will have two fields,
    `train_stream` and `test_stream`, which can be iterated to obtain
    training and test :class:`Experience`. Each Experience contains the
    `dataset` and the associated task label.

    A progressive task label, starting from "0", is applied to each experience.

    The benchmark API is quite simple and is uniform across all benchmark
    generators. It is recommended to check the tutorial of the "benchmark" API,
    which contains usage examples ranging from "basic" to "advanced".

    :param n_experiences: The number of experiences (tasks) in the current
        benchmark. It indicates how many different permutations of the Omniglot
        dataset have to be created.
    :param seed: A valid int used to initialize the random number generator.
        Can be None.
    :param train_transform: The transformation to apply to the training data
        before the random permutation, e.g. a random crop, a normalization or a
        concatenation of different transformations (see torchvision.transform
        documentation for a comprehensive list of possible transformations).
        If no transformation is passed, the default train transformation
        will be used.
    :param eval_transform: The transformation to apply to the test data
        before the random permutation, e.g. a random crop, a normalization or a
        concatenation of different transformations (see torchvision.transform
        documentation for a comprehensive list of possible transformations).
        If no transformation is passed, the default test transformation
        will be used.
    :param dataset_root: The root path of the dataset. Defaults to None, which
        means that the default location for 'omniglot' will be used.

    :returns: A properly initialized :class:`NCScenario` instance.
    """

    list_train_dataset = []
    list_test_dataset = []
    rng_permute = np.random.RandomState(seed)

    omniglot_train, omniglot_test = _get_omniglot_dataset(dataset_root)

    # for every incremental experience
    for _ in range(n_experiences):
        # choose a random permutation of the pixels in the image
        idx_permute = torch.from_numpy(rng_permute.permutation(11025)).type(
            torch.int64)

        permutation = PixelsPermutation(idx_permute)

        permutation_transforms = dict(
            train=(permutation, None),
            eval=(permutation, None)
        )

        # Freeze the permutation
        permuted_train = AvalancheDataset(
            omniglot_train,
            transform_groups=permutation_transforms,
            initial_transform_group='train').freeze_transforms()

        permuted_test = AvalancheDataset(
            omniglot_test,
            transform_groups=permutation_transforms,
            initial_transform_group='eval').freeze_transforms()

        list_train_dataset.append(permuted_train)
        list_test_dataset.append(permuted_test)

    return nc_benchmark(
        list_train_dataset,
        list_test_dataset,
        n_experiences=len(list_train_dataset),
        task_labels=True,
        shuffle=False,
        class_ids_from_zero_in_each_exp=True,
        one_dataset_per_exp=True,
        train_transform=train_transform,
        eval_transform=eval_transform)
Exemplo n.º 8
0
def create_multi_dataset_generic_scenario(
    train_dataset_list: Sequence[SupportedDataset],
    test_dataset_list: Sequence[SupportedDataset],
    task_labels: Sequence[int],
    complete_test_set_only: bool = False,
    train_transform=None,
    train_target_transform=None,
    eval_transform=None,
    eval_target_transform=None,
    dataset_type: AvalancheDatasetType = None,
) -> GenericCLScenario:
    """
    This helper function is DEPRECATED in favor of
    `create_multi_dataset_generic_benchmark`.

    Creates a generic scenario given a list of datasets and the respective task
    labels. Each training dataset will be considered as a separate training
    experience. Contents of the datasets will not be changed, including the
    targets.

    When loading the datasets from a set of fixed filelist, consider using
    the :func:`create_generic_scenario_from_filelists` helper method instead.

    In its base form, this function accepts a list of test datsets that must
    contain the same amount of datasets of the training list.
    Those pairs are then used to create the "past", "cumulative"
    (a.k.a. growing) and "future" test sets. However, in certain Continual
    Learning scenarios only the concept of "complete" test set makes sense. In
    that case, the ``complete_test_set_only`` should be set to True (see the
    parameter description for more info).

    Beware that pattern transformations must already be included in the
    datasets (when needed).

    :param train_dataset_list: A list of training datasets.
    :param test_dataset_list: A list of test datasets.
    :param task_labels: A list of task labels. Must contain the same amount of
        elements of the ``train_dataset_list`` parameter. For
        Single-Incremental-Task (a.k.a. Task-Free) scenarios, this is usually
        a list of zeros. For Multi Task scenario, this is usually a list of
        ascending task labels (starting from 0).
    :param complete_test_set_only: If True, only the complete test set will
        be returned by the scenario. This means that the ``test_dataset_list``
        parameter must be list with a single element (the complete test set).
        Defaults to False, which means that ``train_dataset_list`` and
        ``test_dataset_list`` must contain the same amount of datasets.
    :param train_transform: The transformation to apply to the training data,
        e.g. a random crop, a normalization or a concatenation of different
        transformations (see torchvision.transform documentation for a
        comprehensive list of possible transformations). Defaults to None.
    :param train_target_transform: The transformation to apply to training
        patterns targets. Defaults to None.
    :param eval_transform: The transformation to apply to the test data,
        e.g. a random crop, a normalization or a concatenation of different
        transformations (see torchvision.transform documentation for a
        comprehensive list of possible transformations). Defaults to None.
    :param eval_target_transform: The transformation to apply to test
        patterns targets. Defaults to None.
    :param dataset_type: The type of the dataset. Defaults to None, which
        means that the type will be obtained from the input datasets. If input
        datasets are not instances of :class:`AvalancheDataset`, the type
        UNDEFINED will be used.

    :returns: A :class:`GenericCLScenario` instance.
    """

    warnings.warn(
        "create_multi_dataset_generic_scenario is deprecated in favor"
        " of create_multi_dataset_generic_benchmark.",
        DeprecationWarning,
    )

    transform_groups = dict(
        train=(train_transform, train_target_transform),
        eval=(eval_transform, eval_target_transform),
    )

    if complete_test_set_only:
        if len(test_dataset_list) != 1:
            raise ValueError("Test must contain 1 element when"
                             "complete_test_set_only is True")
    else:
        if len(test_dataset_list) != len(train_dataset_list):
            raise ValueError("Train and test lists must define the same "
                             " amount of experiences")

    train_t_labels = []
    train_dataset_list = list(train_dataset_list)
    for dataset_idx in range(len(train_dataset_list)):
        dataset = train_dataset_list[dataset_idx]
        train_t_labels.append(task_labels[dataset_idx])
        train_dataset_list[dataset_idx] = AvalancheDataset(
            dataset,
            task_labels=ConstantSequence(task_labels[dataset_idx],
                                         len(dataset)),
            transform_groups=transform_groups,
            initial_transform_group="train",
            dataset_type=dataset_type,
        )

    test_t_labels = []
    test_dataset_list = list(test_dataset_list)
    for dataset_idx in range(len(test_dataset_list)):
        dataset = test_dataset_list[dataset_idx]

        test_t_label = task_labels[dataset_idx]
        if complete_test_set_only:
            test_t_label = 0

        test_t_labels.append(test_t_label)

        test_dataset_list[dataset_idx] = AvalancheDataset(
            dataset,
            task_labels=ConstantSequence(test_t_label, len(dataset)),
            transform_groups=transform_groups,
            initial_transform_group="eval",
            dataset_type=dataset_type,
        )

    return GenericCLScenario(
        stream_definitions={
            "train": (train_dataset_list, train_t_labels),
            "test": (test_dataset_list, test_t_labels),
        },
        complete_test_set_only=complete_test_set_only,
    )
def create_generic_benchmark_from_paths(
    train_lists_of_files: Sequence[Sequence[FileAndLabel]],
    test_lists_of_files: Union[
        Sequence[FileAndLabel], Sequence[Sequence[FileAndLabel]]
    ],
    *,
    other_streams_lists_of_files: Dict[
        str, Sequence[Sequence[FileAndLabel]]
    ] = None,
    task_labels: Sequence[int],
    complete_test_set_only: bool = False,
    train_transform=None,
    train_target_transform=None,
    eval_transform=None,
    eval_target_transform=None,
    other_streams_transforms: Dict[str, Tuple[Any, Any]] = None,
    dataset_type: AvalancheDatasetType = AvalancheDatasetType.UNDEFINED
) -> GenericCLScenario:
    """
    Creates a benchmark instance given a sequence of lists of files. A separate
    dataset will be created for each list. Each of those datasets
    will be considered a separate experience.

    This is very similar to :func:`create_generic_benchmark_from_filelists`,
    with the main difference being that
    :func:`create_generic_benchmark_from_filelists` accepts, for each
    experience, a file list formatted in Caffe-style. On the contrary, this
    accepts a list of tuples where each tuple contains two elements: the full
    path to the pattern and its label. Optionally, the tuple may contain a third
    element describing the bounding box of the element to crop. This last
    bounding box may be useful when trying to extract the part of the image
    depicting the desired element.

    Apart from that, the same limitations of
    :func:`create_generic_benchmark_from_filelists` regarding task labels apply.

    The label of each pattern doesn't have to be an int. Also, a dataset type
    can be defined.

    :param train_lists_of_files: A list of lists. Each list describes the paths
        and labels of patterns to include in that training experience, as
        tuples. Each tuple must contain two elements: the full path to the
        pattern and its class label. Optionally, the tuple may contain a
        third element describing the bounding box to use for cropping (top,
        left, height, width).
    :param test_lists_of_files: A list of lists. Each list describes the paths
        and labels of patterns to include in that test experience, as tuples.
        Each tuple must contain two elements: the full path to the pattern
        and its class label. Optionally, the tuple may contain a third element
        describing the bounding box to use for cropping (top, left, height,
        width).
    :param other_streams_lists_of_files: A dictionary describing the content of
        custom streams. Keys must be valid stream names (letters and numbers,
        not starting with a number) while the value follow the same structure
        of `train_lists_of_files` and `test_lists_of_files` parameters. If this
        dictionary contains the definition for "train" or "test" streams then
        those definition will  override the `train_lists_of_files` and
        `test_lists_of_files` parameters.
    :param task_labels: A list of task labels. Must contain at least a value
        for each experience. Each value describes the task label that will be
        applied to all patterns of a certain experience. For more info on that,
        see the function description.
    :param complete_test_set_only: If True, only the complete test set will
        be returned by the benchmark. This means that the ``test_list_of_files``
        parameter must define a single experience (the complete test set).
        Defaults to False.
    :param train_transform: The transformation to apply to the training data,
        e.g. a random crop, a normalization or a concatenation of different
        transformations (see torchvision.transform documentation for a
        comprehensive list of possible transformations). Defaults to None.
    :param train_target_transform: The transformation to apply to training
        patterns targets. Defaults to None.
    :param eval_transform: The transformation to apply to the test data,
        e.g. a random crop, a normalization or a concatenation of different
        transformations (see torchvision.transform documentation for a
        comprehensive list of possible transformations). Defaults to None.
    :param eval_target_transform: The transformation to apply to test
        patterns targets. Defaults to None.
    :param other_streams_transforms: Transformations to apply to custom
        streams. If no transformations are defined for a custom stream,
        then "train" transformations will be used. This parameter must be a
        dictionary mapping stream names to transformations. The transformations
        must be a two elements tuple where the first element defines the
        X transformation while the second element is the Y transformation.
        Those elements can be None. If this dictionary contains the
        transformations for "train" or "test" streams then those transformations
        will override the `train_transform`, `train_target_transform`,
        `eval_transform` and `eval_target_transform` parameters.
    :param dataset_type: The type of the dataset. Defaults to UNDEFINED.

    :returns: A :class:`GenericCLScenario` instance.
    """

    input_streams = dict(train=train_lists_of_files, test=test_lists_of_files)

    if other_streams_lists_of_files is not None:
        input_streams = {**input_streams, **other_streams_lists_of_files}

    stream_definitions = dict()

    for stream_name, lists_of_files in input_streams.items():
        stream_datasets = []
        for exp_id, list_of_files in enumerate(lists_of_files):
            common_root, exp_paths_list = common_paths_root(list_of_files)
            paths_dataset = PathsDataset(common_root, exp_paths_list)
            stream_datasets.append(
                AvalancheDataset(paths_dataset, task_labels=task_labels[exp_id])
            )

        stream_definitions[stream_name] = stream_datasets

    return create_multi_dataset_generic_benchmark(
        [],
        [],
        other_streams_datasets=stream_definitions,
        train_transform=train_transform,
        train_target_transform=train_target_transform,
        eval_transform=eval_transform,
        eval_target_transform=eval_target_transform,
        complete_test_set_only=complete_test_set_only,
        other_streams_transforms=other_streams_transforms,
        dataset_type=dataset_type,
    )
def create_multi_dataset_generic_benchmark(
    train_datasets: Sequence[SupportedDataset],
    test_datasets: Sequence[SupportedDataset],
    *,
    other_streams_datasets: Dict[str, Sequence[SupportedDataset]] = None,
    complete_test_set_only: bool = False,
    train_transform=None,
    train_target_transform=None,
    eval_transform=None,
    eval_target_transform=None,
    other_streams_transforms: Dict[str, Tuple[Any, Any]] = None,
    dataset_type: AvalancheDatasetType = None
) -> GenericCLScenario:
    """
    Creates a benchmark instance given a list of datasets. Each dataset will be
    considered as a separate experience.

    Contents of the datasets must already be set, including task labels.
    Transformations will be applied if defined.

    This function allows for the creation of custom streams as well.
    While "train" and "test" datasets must always be set, the experience list
    for other streams can be defined by using the `other_streams_datasets`
    parameter.

    If transformations are defined, they will be applied to the datasets
    of the related stream.

    :param train_datasets: A list of training datasets.
    :param test_datasets: A list of test datasets.
    :param other_streams_datasets: A dictionary describing the content of custom
        streams. Keys must be valid stream names (letters and numbers,
        not starting with a number) while the value must be a list of dataset.
        If this dictionary contains the definition for "train" or "test"
        streams then those definition will override the `train_datasets` and
        `test_datasets` parameters.
    :param complete_test_set_only: If True, only the complete test set will
        be returned by the benchmark. This means that the ``test_dataset_list``
        parameter must be list with a single element (the complete test set).
        Defaults to False.
    :param train_transform: The transformation to apply to the training data,
        e.g. a random crop, a normalization or a concatenation of different
        transformations (see torchvision.transform documentation for a
        comprehensive list of possible transformations). Defaults to None.
    :param train_target_transform: The transformation to apply to training
        patterns targets. Defaults to None.
    :param eval_transform: The transformation to apply to the test data,
        e.g. a random crop, a normalization or a concatenation of different
        transformations (see torchvision.transform documentation for a
        comprehensive list of possible transformations). Defaults to None.
    :param eval_target_transform: The transformation to apply to test
        patterns targets. Defaults to None.
    :param other_streams_transforms: Transformations to apply to custom
        streams. If no transformations are defined for a custom stream,
        then "train" transformations will be used. This parameter must be a
        dictionary mapping stream names to transformations. The transformations
        must be a two elements tuple where the first element defines the
        X transformation while the second element is the Y transformation.
        Those elements can be None. If this dictionary contains the
        transformations for "train" or "test" streams then those transformations
        will override the `train_transform`, `train_target_transform`,
        `eval_transform` and `eval_target_transform` parameters.
    :param dataset_type: The type of the dataset. Defaults to None, which
        means that the type will be obtained from the input datasets. If input
        datasets are not instances of :class:`AvalancheDataset`, the type
        UNDEFINED will be used.

    :returns: A :class:`GenericCLScenario` instance.
    """

    transform_groups = dict(
        train=(train_transform, train_target_transform),
        eval=(eval_transform, eval_target_transform),
    )

    if other_streams_transforms is not None:
        for stream_name, stream_transforms in other_streams_transforms.items():
            if isinstance(stream_transforms, Sequence):
                if len(stream_transforms) == 1:
                    # Suppose we got only the transformation for X values
                    stream_transforms = (stream_transforms[0], None)
            else:
                # Suppose it's the transformation for X values
                stream_transforms = (stream_transforms, None)

            transform_groups[stream_name] = stream_transforms

    input_streams = dict(train=train_datasets, test=test_datasets)

    if other_streams_datasets is not None:
        input_streams = {**input_streams, **other_streams_datasets}

    if complete_test_set_only:
        if len(input_streams["test"]) != 1:
            raise ValueError(
                "Test stream must contain one experience when"
                "complete_test_set_only is True"
            )

    stream_definitions = dict()

    for stream_name, dataset_list in input_streams.items():
        initial_transform_group = "train"
        if stream_name in transform_groups:
            initial_transform_group = stream_name

        stream_datasets = []
        for dataset_idx in range(len(dataset_list)):
            dataset = dataset_list[dataset_idx]
            stream_datasets.append(
                AvalancheDataset(
                    dataset,
                    transform_groups=transform_groups,
                    initial_transform_group=initial_transform_group,
                    dataset_type=dataset_type,
                )
            )
        stream_definitions[stream_name] = (stream_datasets,)

    return GenericCLScenario(
        stream_definitions=stream_definitions,
        complete_test_set_only=complete_test_set_only,
    )
def create_generic_benchmark_from_filelists(
    root: Optional[Union[str, Path]],
    train_file_lists: Sequence[Union[str, Path]],
    test_file_lists: Sequence[Union[str, Path]],
    *,
    other_streams_file_lists: Dict[str, Sequence[Union[str, Path]]] = None,
    task_labels: Sequence[int],
    complete_test_set_only: bool = False,
    train_transform=None,
    train_target_transform=None,
    eval_transform=None,
    eval_target_transform=None,
    other_streams_transforms: Dict[str, Tuple[Any, Any]] = None
) -> GenericCLScenario:
    """
    Creates a benchmark instance given a list of filelists and the respective
    task labels. A separate dataset will be created for each filelist and each
    of those datasets will be considered a separate experience.

    This helper functions is the best shot when loading Caffe-style dataset
    based on filelists.

    Beware that this helper function is limited is the following two aspects:

    - The resulting benchmark instance and the intermediate datasets used to
      populate it will be of type CLASSIFICATION. There is no way to change
      this.
    - Task labels can only be defined by choosing a single task label for
      each experience (the same task label is applied to all patterns of
      experiences sharing the same position in different streams).

    Despite those constraints, this helper function is usually sufficiently
    powerful to cover most continual learning benchmarks based on file lists.

    When in need to create a similar benchmark instance starting from an
    in-memory list of paths, then the similar helper function
    :func:`create_generic_benchmark_from_paths` can be used.

    When in need to create a benchmark instance in which task labels are defined
    in a more fine-grained way, then consider using
    :func:`create_multi_dataset_generic_benchmark` by passing properly
    initialized :class:`AvalancheDataset` instances.

    :param root: The root path of the dataset. Can be None.
    :param train_file_lists: A list of filelists describing the
        paths of the training patterns for each experience.
    :param test_file_lists: A list of filelists describing the
        paths of the test patterns for each experience.
    :param other_streams_file_lists: A dictionary describing the content of
        custom streams. Keys must be valid stream names (letters and numbers,
        not starting with a number) while the value must be a list of filelists
        (same as `train_file_lists` and `test_file_lists` parameters). If this
        dictionary contains the definition for "train" or "test" streams then
        those definition will  override the `train_file_lists` and
        `test_file_lists` parameters.
    :param task_labels: A list of task labels. Must contain at least a value
        for each experience. Each value describes the task label that will be
        applied to all patterns of a certain experience. For more info on that,
        see the function description.
    :param complete_test_set_only: If True, only the complete test set will
        be returned by the benchmark. This means that the ``test_file_lists``
        parameter must be list with a single element (the complete test set).
        Alternatively, can be a plain string or :class:`Path` object.
        Defaults to False.
    :param train_transform: The transformation to apply to the training data,
        e.g. a random crop, a normalization or a concatenation of different
        transformations (see torchvision.transform documentation for a
        comprehensive list of possible transformations). Defaults to None.
    :param train_target_transform: The transformation to apply to training
        patterns targets. Defaults to None.
    :param eval_transform: The transformation to apply to the test data,
        e.g. a random crop, a normalization or a concatenation of different
        transformations (see torchvision.transform documentation for a
        comprehensive list of possible transformations). Defaults to None.
    :param eval_target_transform: The transformation to apply to test
        patterns targets. Defaults to None.
    :param other_streams_transforms: Transformations to apply to custom
        streams. If no transformations are defined for a custom stream,
        then "train" transformations will be used. This parameter must be a
        dictionary mapping stream names to transformations. The transformations
        must be a two elements tuple where the first element defines the
        X transformation while the second element is the Y transformation.
        Those elements can be None. If this dictionary contains the
        transformations for "train" or "test" streams then those transformations
        will override the `train_transform`, `train_target_transform`,
        `eval_transform` and `eval_target_transform` parameters.

    :returns: A :class:`GenericCLScenario` instance.
    """

    input_streams = dict(train=train_file_lists, test=test_file_lists)

    if other_streams_file_lists is not None:
        input_streams = {**input_streams, **other_streams_file_lists}

    stream_definitions = dict()

    for stream_name, file_lists in input_streams.items():
        stream_datasets = []
        for exp_id, f_list in enumerate(file_lists):

            f_list_dataset = FilelistDataset(root, f_list)
            stream_datasets.append(
                AvalancheDataset(
                    f_list_dataset, task_labels=task_labels[exp_id]
                )
            )

        stream_definitions[stream_name] = stream_datasets

    return create_multi_dataset_generic_benchmark(
        [],
        [],
        other_streams_datasets=stream_definitions,
        train_transform=train_transform,
        train_target_transform=train_target_transform,
        eval_transform=eval_transform,
        eval_target_transform=eval_target_transform,
        complete_test_set_only=complete_test_set_only,
        other_streams_transforms=other_streams_transforms,
        dataset_type=AvalancheDatasetType.CLASSIFICATION,
    )
Exemplo n.º 12
0
    def __init__(self: TGenericCLScenario,
                 original_train_dataset: TrainSet,
                 original_test_dataset: TestSet,
                 train_dataset: AvalancheDataset,
                 test_dataset: AvalancheDataset,
                 train_exps_patterns_assignment: Sequence[Sequence[int]],
                 test_exps_patterns_assignment: Sequence[Sequence[int]],
                 task_labels: Sequence[List[int]],
                 pattern_train_task_labels: Sequence[int],
                 pattern_test_task_labels: Sequence[int],
                 complete_test_set_only: bool = False,
                 reproducibility_data: Optional[Dict[str, Any]] = None,
                 experience_factory: Callable[['GenericScenarioStream', int],
                                              TExperience] = None):
        """
        Creates an instance of a Continual Learning scenario.

        The scenario is defined by the train and test datasets plus the
        assignment of patterns to experiences (batches/tasks).

        :param train_dataset: The training dataset. The dataset must be a
            subclass of :class:`AvalancheDataset`. For instance, one can
            use the datasets from the torchvision package like that:
            ``train_dataset=AvalancheDataset(torchvision_dataset)``.
        :param test_dataset: The test dataset. The dataset must be a
            subclass of :class:`AvalancheDataset`. For instance, one can
            use the datasets from the torchvision package like that:
            ``test_dataset=AvalancheDataset(torchvision_dataset)``.
        :param train_exps_patterns_assignment: A list of experiences. Each
            experience is in turn defined by a list of integers describing the
            pattern index inside the training dataset.
        :param test_exps_patterns_assignment: A list of experiences. Each
            experience is in turn defined by a list of integers describing the
            pattern index inside the test dataset.
        :param task_labels: The mapping from experience IDs to task labels,
            usually as a list of integers.
        :param pattern_train_task_labels: The list of task labels of each
            pattern in the `train_dataset`.
        :param pattern_test_task_labels: The list of task labels of each
            pattern in the `test_dataset`.
        :param complete_test_set_only: If True, only the complete test
            set will be returned from test set related methods of the linked
            :class:`GenericExperience` instances. This also means that the
            ``test_exps_patterns_assignment`` parameter can be a single element
            or even an empty list (in which case, the full set defined by
            the ``test_dataset`` parameter will be returned). The returned
            task label for the complete test set will be the first element
            of the ``task_labels`` parameter. Defaults to False, which means
            that ```train_exps_patterns_assignment`` and
            ``test_exps_patterns_assignment`` parameters must describe an equal
            amount of experiences.
        :param reproducibility_data: If not None, overrides the
            ``train/test_exps_patterns_assignment`` and ``task_labels``
            parameters. This is usually a dictionary containing data used to
            reproduce a specific experiment. One can use the
            ``get_reproducibility_data`` method to get (and even distribute)
            the experiment setup so that it can be loaded by passing it as this
            parameter. In this way one can be sure that the same specific
            experimental setup is being used (for reproducibility purposes).
            Beware that, in order to reproduce an experiment, the same train and
            test datasets must be used. Defaults to None.
        :param experience_factory: If not None, a callable that, given the
            scenario instance and the experience ID, returns a experience
            instance. This parameter is usually used in subclasses (when
            invoking the super constructor) to specialize the experience class.
            Defaults to None, which means that the :class:`GenericExperience`
            constructor will be used.
        """

        self.original_train_dataset: TrainSet = original_train_dataset
        """ The original training set. """

        self.original_test_dataset: TestSet = original_test_dataset
        """ The original test set. """

        self.train_exps_patterns_assignment: Sequence[Sequence[int]]
        """ A list containing which training patterns are assigned to each 
        experience. Patterns are identified by their id w.r.t. the dataset found
        in the train_dataset field. """

        self.test_exps_patterns_assignment: Sequence[Sequence[int]]
        """ A list containing which test patterns are assigned to each
        experience. Patterns are identified by their id w.r.t. the dataset found
        in the test_dataset field """

        self.task_labels: Sequence[List[int]] = task_labels
        """ The task label of each experience. """

        self.pattern_train_task_labels: Sequence[int] = \
            pattern_train_task_labels
        """ The task label of each pattern in the training dataset. """

        self.pattern_test_task_labels: Sequence[int] = pattern_test_task_labels
        """ The task label of each pattern in the test dataset. """

        self.train_exps_patterns_assignment: Sequence[Sequence[int]] = \
            train_exps_patterns_assignment
        self.test_exps_patterns_assignment: Sequence[Sequence[int]] = \
            test_exps_patterns_assignment

        self.complete_test_set_only: bool = bool(complete_test_set_only)
        """
        If True, only the complete test set will be returned from experience
        instances.
        
        This flag is usually set to True in scenarios where having one separate
        test set aligned to each training experience is impossible or doesn't
        make sense from a semantic point of view.
        """

        if reproducibility_data is not None:
            self.train_exps_patterns_assignment = reproducibility_data['train']
            self.test_exps_patterns_assignment = reproducibility_data['test']
            self.task_labels = reproducibility_data['task_labels']
            self.pattern_train_task_labels = reproducibility_data[
                'pattern_train_task_labels']
            self.pattern_test_task_labels = reproducibility_data[
                'pattern_test_task_labels']
            self.complete_test_set_only = \
                reproducibility_data['complete_test_only']

        self.n_experiences: int = len(self.train_exps_patterns_assignment)
        """  The number of incremental experiences this scenario is made of. """

        if experience_factory is None:
            experience_factory = GenericExperience

        self.experience_factory: Callable[[TGenericScenarioStream, int],
                                          TExperience] = experience_factory

        if self.complete_test_set_only:
            if len(self.test_exps_patterns_assignment) > 1:
                raise ValueError(
                    'complete_test_set_only is True, but '
                    'test_exps_patterns_assignment contains more than one '
                    'element')
        elif len(self.train_exps_patterns_assignment) != \
                len(self.test_exps_patterns_assignment):
            raise ValueError('There must be the same amount of train and '
                             'test experiences')

        if len(self.train_exps_patterns_assignment) != len(self.task_labels):
            raise ValueError('There must be the same number of train '
                             'experiences and task labels')

        self.train_dataset: AvalancheDataset = AvalancheDataset(
            train_dataset, task_labels=self.pattern_train_task_labels)
        """ The training set used to generate the incremental experiences. """

        self.test_dataset: AvalancheDataset = AvalancheDataset(
            test_dataset, task_labels=self.pattern_test_task_labels)
        """ The test set used to generate the incremental experiences. """

        self.train_stream: GenericScenarioStream[
            TExperience,
            TGenericCLScenario] = GenericScenarioStream('train', self)
        """
        The stream used to obtain the training experiences. 
        This stream can be sliced in order to obtain a subset of this stream.
        """

        self.test_stream: GenericScenarioStream[
            TExperience,
            TGenericCLScenario] = GenericScenarioStream('test', self)
        """
Exemplo n.º 13
0
def CTrL(
    stream_name: str,
    save_to_disk: bool = False,
    path: Path = default_dataset_location(""),
    seed: int = None,
    n_tasks: int = None,
):
    """
    Gives access to the Continual Transfer Learning benchmark streams
    introduced in https://arxiv.org/abs/2012.12631.
    :param stream_name: Name of the test stream to generate. Must be one of
    `s_plus`, `s_minus`, `s_in`, `s_out` and `s_pl`.
    :param save_to_disk:  Whether to save each stream on the disk or load
    everything in memory. Setting it to `True` will save memory but takes more
    time on the first generation using the corresponding seed.
    :param path: The path under which the generated stream will be saved if
    save_to_disk is True.
    :param seed: The seed to use to generate the streams. If no seed is given,
    a random one will be used to make sure that the generated stream can
    be reproduced.
    :param n_tasks: The number of tasks to generate. This parameter is only
    relevant for the `s_long` stream, as all other streams have a fixed number
    of tasks.
    :return: A scenario containing 3 streams: train, val and test.
    """
    seed = seed or random.randint(0, sys.maxsize)
    if stream_name != "s_long" and n_tasks is not None:
        raise ValueError("The n_tasks parameter can only be used with the "
                         f'"s_long" stream, asked {n_tasks} for {stream_name}')
    elif stream_name == "s_long" and n_tasks is None:
        n_tasks = 100

    stream = ctrl.get_stream(stream_name, seed)

    if save_to_disk:
        folder = path / "ctrl" / stream_name / f"seed_{seed}"

    # Train, val and test experiences
    exps = [[], [], []]
    for t_id, t in enumerate(tqdm(stream, desc=f"Loading {stream_name}"), ):
        trans = transforms.Normalize(t.statistics["mean"], t.statistics["std"])
        for split, split_name, exp in zip(t.datasets, t.split_names, exps):
            samples, labels = split.tensors
            task_labels = [t.id] * samples.size(0)
            if save_to_disk:
                exp_folder = folder / f"exp_{t_id}" / split_name
                exp_folder.mkdir(parents=True, exist_ok=True)
                files = []
                for i, (sample, label) in enumerate(zip(samples, labels)):
                    sample_path = exp_folder / f"sample_{i}.png"
                    if not sample_path.exists():
                        F.to_pil_image(sample).save(sample_path)
                    files.append((sample_path, label.item()))

                common_root, exp_paths_list = common_paths_root(files)
                paths_dataset = PathsDataset(common_root, exp_paths_list)
                dataset = AvalancheDataset(
                    paths_dataset,
                    task_labels=task_labels,
                    transform=transforms.Compose(
                        [transforms.ToTensor(), trans]),
                )
            else:
                dataset = AvalancheTensorDataset(
                    samples,
                    labels.squeeze(1),
                    task_labels=task_labels,
                    transform=trans,
                )
            exp.append(dataset)
        if stream_name == "s_long" and t_id == n_tasks - 1:
            break

    return dataset_benchmark(
        train_datasets=exps[0],
        test_datasets=exps[2],
        other_streams_datasets=dict(val=exps[1]),
    )
Exemplo n.º 14
0
def run_experiment(config):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    torch.manual_seed(config.seed)
    torch.cuda.manual_seed(config.seed)
    np.random.seed(config.seed)
    random.seed(config.seed)
    torch.backends.cudnn.enabled = False
    torch.backends.cudnn.deterministic = True

    per_pixel_mean = get_dataset_per_pixel_mean(
        CIFAR100(
            expanduser("~") + "/.avalanche/data/cifar100/",
            train=True,
            download=True,
            transform=transforms.Compose([transforms.ToTensor()]),
        )
    )

    transforms_group = dict(
        eval=(
            transforms.Compose(
                [
                    transforms.ToTensor(),
                    lambda img_pattern: img_pattern - per_pixel_mean,
                ]
            ),
            None,
        ),
        train=(
            transforms.Compose(
                [
                    transforms.ToTensor(),
                    lambda img_pattern: img_pattern - per_pixel_mean,
                    icarl_cifar100_augment_data,
                ]
            ),
            None,
        ),
    )

    train_set = CIFAR100(
        expanduser("~") + "/.avalanche/data/cifar100/",
        train=True,
        download=True,
    )
    test_set = CIFAR100(
        expanduser("~") + "/.avalanche/data/cifar100/",
        train=False,
        download=True,
    )

    train_set = AvalancheDataset(
        train_set,
        transform_groups=transforms_group,
        initial_transform_group="train",
    )
    test_set = AvalancheDataset(
        test_set,
        transform_groups=transforms_group,
        initial_transform_group="eval",
    )

    scenario = nc_benchmark(
        train_dataset=train_set,
        test_dataset=test_set,
        n_experiences=config.nb_exp,
        task_labels=False,
        seed=config.seed,
        shuffle=False,
        fixed_class_order=config.fixed_class_order,
    )

    evaluator = EvaluationPlugin(
        EpochAccuracy(),
        ExperienceAccuracy(),
        StreamAccuracy(),
        loggers=[InteractiveLogger()],
    )

    model: IcarlNet = make_icarl_net(num_classes=100)
    model.apply(initialize_icarl_net)

    optim = SGD(
        model.parameters(),
        lr=config.lr_base,
        weight_decay=config.wght_decay,
        momentum=0.9,
    )
    sched = LRSchedulerPlugin(
        MultiStepLR(optim, config.lr_milestones, gamma=1.0 / config.lr_factor)
    )

    strategy = ICaRL(
        model.feature_extractor,
        model.classifier,
        optim,
        config.memory_size,
        buffer_transform=transforms.Compose([icarl_cifar100_augment_data]),
        fixed_memory=True,
        train_mb_size=config.batch_size,
        train_epochs=config.epochs,
        eval_mb_size=config.batch_size,
        plugins=[sched],
        device=device,
        evaluator=evaluator,
    )

    for i, exp in enumerate(scenario.train_stream):
        eval_exps = [e for e in scenario.test_stream][: i + 1]
        strategy.train(exp, num_workers=4)
        strategy.eval(eval_exps, num_workers=4)
Exemplo n.º 15
0
def datasets_from_paths(
        train_list, test_list, complete_test_set_only=False,
        train_transform=None, train_target_transform=None,
        test_transform=None, test_target_transform=None):
    """
    This utility takes, for each dataset to generate, a list of tuples each
    containing two elements: the full path to the pattern and its class label.
    Optionally, the tuple may contain a third element describing the bounding
    box to use for cropping.

    This is equivalent to `datasets_from_filelists`, which description
    contains more details on the behaviour of this utility. The two utilities
    differ in which `datasets_from_filelists` accepts paths to Caffe-style
    filelists while this one is able to create the datasets from an in-memory
    list.

    Note: this utility may try to detect (and strip) the common root path of
    all patterns in order to save some RAM memory.

    :param train_list: list of lists. Each list must contain tuples of two
        elements: the full path to the pattern and its class label. Optionally,
        the tuple may contain a third element describing the bounding box to use
        for cropping (top, left, height, width).
    :param test_list: list of lists. Each list must contain tuples of two
        elements: the full path to the pattern and its class label. Optionally,
        the tuple may contain a third element describing the bounding box to use
        for cropping (top, left, height, width). It can be also a single list
        when the test dataset is the same for each experience.
    :param complete_test_set_only: if True, test_list must contain a single list
        that will serve as the complete test set. If False, train_list and
        test_list must describe the same amount of datasets. Defaults to False.
    :param train_transform: The transformation to apply to training patterns.
        Defaults to None.
    :param train_target_transform: The transformation to apply to training
        patterns targets. Defaults to None.
    :param test_transform: The transformation to apply to test patterns.
        Defaults to None.
    :param test_target_transform: The transformation to apply to test
        patterns targets. Defaults to None.

    :return: A list of tuples (train dataset, test dataset).
    """

    if complete_test_set_only:
        # Check if the single dataset was passed as [Tuple1, Tuple2, ...]
        # or as [[Tuple1, Tuple2, ...]]
        if not isinstance(test_list[0], Tuple):
            if len(test_list) > 1:
                raise ValueError(
                    'When complete_test_set_only is True, test_list must '
                    'be a single list of tuples or a nested list containing '
                    'a single lis of tuples')
            else:
                test_list = test_list[0]
        else:
            test_list = [test_list]
    else:
        if len(test_list) != len(train_list):
            raise ValueError(
                'When complete_test_set_only is False, test_list and '
                'train_list must contain the same number of elements.')

    transform_groups = dict(train=(train_transform, train_target_transform),
                            eval=(test_transform, test_target_transform))

    common_root = None

    # Detect common root
    try:
        all_paths = [pattern_tuple[0] for exp_list in train_list
                     for pattern_tuple in exp_list] + \
                    [pattern_tuple[0] for exp_list in test_list
                     for pattern_tuple in exp_list]

        common_root = os.path.commonpath(all_paths)
    except ValueError:
        # commonpath may throw a ValueError in different situations!
        # See the official documentation for more details
        pass

    if common_root is not None and len(common_root) > 0 and \
            common_root != '/':
        has_common_root = True
        common_root = str(common_root)
    else:
        has_common_root = False
        common_root = None

    if has_common_root:
        # print(f'Common root found: {common_root}!')
        # All paths have a common filesystem root
        # Remove it from all paths!
        single_path_case = False
        tr_list = list()
        te_list = list()

        for idx_exp_list in range(len(train_list)):
            if single_path_case:
                break
            st_list = list()
            for x in train_list[idx_exp_list]:
                rel = os.path.relpath(x[0], common_root)
                if len(rel) == 0 or rel == '.':
                    # May happen if the dataset has a single path
                    single_path_case = True
                    break
                st_list.append((rel, *x[1:]))
            tr_list.append(st_list)

        for idx_exp_list in range(len(test_list)):
            if single_path_case:
                break
            st_list = list()
            for x in test_list[idx_exp_list]:
                rel = os.path.relpath(x[0], common_root)
                if len(rel) == 0 or rel == '.':
                    # May happen if the dataset has a single path
                    single_path_case = True
                    break
                st_list.append((rel, *x[1:]))
            te_list.append(st_list)
        if not single_path_case:
            train_list = tr_list
            test_list = te_list
        else:
            has_common_root = False
            common_root = None

    train_inc_datasets = \
        [AvalancheDataset(PathsDataset(common_root, tr_flist),
                          transform_groups=transform_groups,
                          initial_transform_group='train')
         for tr_flist in train_list]
    test_inc_datasets = \
        [AvalancheDataset(PathsDataset(common_root, te_flist),
                          transform_groups=transform_groups,
                          initial_transform_group='eval')
         for te_flist in test_list]

    return train_inc_datasets, test_inc_datasets
Exemplo n.º 16
0
def datasets_from_filelists(root, train_filelists, test_filelists,
                            complete_test_set_only=False,
                            train_transform=None, train_target_transform=None,
                            test_transform=None, test_target_transform=None):
    """
    This reader reads a list of Caffe-style filelists and returns the proper
    Dataset objects.

    A Caffe-style list is just a text file where, for each line, two elements
    are described: the path to the pattern (relative to the root parameter)
    and its class label. Those two elements are separated by a single white
    space.

    This method reads each file list and returns a separate
    dataset for each of them.

    Beware that the parameters must be **list of paths to Caffe-style
    filelists**. If you need to create a dataset given a list of
    **pattern paths**, use `datasets_from_paths` instead.

    :param root: root path where the data to load are stored. May be None.
    :param train_filelists: list of paths to train filelists. The flist format
        should be: impath label\\nimpath label\\n ...(same to Caffe's filelist).
    :param test_filelists: list of paths to test filelists. It can be also a
        single path when the datasets is the same for each batch.
    :param complete_test_set_only: if True, test_filelists must contain
        the path to a single filelist that will serve as the complete test set.
        Alternatively, test_filelists can be the path (str) to the complete test
        set filelist. If False, train_filelists and test_filelists must contain
        the same amount of filelists paths. Defaults to False.
    :param train_transform: The transformation to apply to training patterns.
        Defaults to None.
    :param train_target_transform: The transformation to apply to training
        patterns targets. Defaults to None.
    :param test_transform: The transformation to apply to test patterns.
        Defaults to None.
    :param test_target_transform: The transformation to apply to test
        patterns targets. Defaults to None.

    :return: list of tuples (train dataset, test dataset) for each train
        filelist in the list.
    """

    if complete_test_set_only:
        if not (isinstance(test_filelists, str) or
                isinstance(test_filelists, Path)):
            if len(test_filelists) > 1:
                raise ValueError(
                    'When complete_test_set_only is True, test_filelists must '
                    'be a str, Path or a list with a single element describing '
                    'the path to the complete test set.')
            else:
                test_filelists = test_filelists[0]
        else:
            test_filelists = [test_filelists]
    else:
        if len(test_filelists) != len(train_filelists):
            raise ValueError(
                'When complete_test_set_only is False, test_filelists and '
                'train_filelists must contain the same number of elements.')

    transform_groups = dict(train=(train_transform, train_target_transform),
                            eval=(test_transform, test_target_transform))
    train_inc_datasets = \
        [AvalancheDataset(FilelistDataset(root, tr_flist),
                          transform_groups=transform_groups,
                          initial_transform_group='train')
         for tr_flist in train_filelists]
    test_inc_datasets = \
        [AvalancheDataset(FilelistDataset(root, te_flist),
                          transform_groups=transform_groups,
                          initial_transform_group='eval')
         for te_flist in test_filelists]

    return train_inc_datasets, test_inc_datasets
Exemplo n.º 17
0
    def test_avalanche_concat_dataset_collate_fn_inheritance(self):
        tensor_x = torch.rand(200, 3, 28, 28)
        tensor_y = torch.randint(0, 100, (200, ))
        tensor_z = torch.randint(0, 100, (200, ))

        tensor_x2 = torch.rand(200, 3, 28, 28)
        tensor_y2 = torch.randint(0, 100, (200, ))
        tensor_z2 = torch.randint(0, 100, (200, ))

        def my_collate_fn(patterns):
            x_values = torch.stack([pat[0] for pat in patterns], 0)
            y_values = torch.tensor([pat[1] for pat in patterns]) + 1
            z_values = torch.tensor([-1 for _ in patterns])
            t_values = torch.tensor([pat[3] for pat in patterns])
            return x_values, y_values, z_values, t_values

        def my_collate_fn2(patterns):
            x_values = torch.stack([pat[0] for pat in patterns], 0)
            y_values = torch.tensor([pat[1] for pat in patterns]) + 2
            z_values = torch.tensor([-2 for _ in patterns])
            t_values = torch.tensor([pat[3] for pat in patterns])
            return x_values, y_values, z_values, t_values

        dataset1 = TensorDataset(tensor_x, tensor_y, tensor_z)
        dataset2 = AvalancheTensorDataset(tensor_x2,
                                          tensor_y2,
                                          tensor_z2,
                                          collate_fn=my_collate_fn)
        concat = AvalancheConcatDataset([dataset1, dataset2],
                                        collate_fn=my_collate_fn2)  # Ok

        x, y, z, t = dataset2[0:5]
        self.assertIsInstance(x, Tensor)
        self.assertTrue(torch.equal(tensor_x2[0:5], x))
        self.assertTrue(torch.equal(tensor_y2[0:5] + 1, y))
        self.assertTrue(torch.equal(torch.full((5, ), -1, dtype=torch.long),
                                    z))
        self.assertTrue(torch.equal(torch.zeros(5, dtype=torch.long), t))

        x2, y2, z2, t2 = concat[0:5]
        self.assertIsInstance(x2, Tensor)
        self.assertTrue(torch.equal(tensor_x[0:5], x2))
        self.assertTrue(torch.equal(tensor_y[0:5] + 2, y2))
        self.assertTrue(
            torch.equal(torch.full((5, ), -2, dtype=torch.long), z2))
        self.assertTrue(torch.equal(torch.zeros(5, dtype=torch.long), t2))

        dataset1_classification = AvalancheTensorDataset(
            tensor_x,
            tensor_y,
            tensor_z,
            dataset_type=AvalancheDatasetType.CLASSIFICATION)

        dataset2_segmentation = AvalancheDataset(
            dataset2, dataset_type=AvalancheDatasetType.SEGMENTATION)

        with self.assertRaises(ValueError):
            bad_concat_types = dataset1_classification + dataset2_segmentation

        with self.assertRaises(ValueError):
            bad_concat_collate = AvalancheConcatDataset(
                [dataset1, dataset2_segmentation], collate_fn=my_collate_fn)

        ok_concat_classification = dataset1_classification + dataset2
        self.assertEqual(AvalancheDatasetType.CLASSIFICATION,
                         ok_concat_classification.dataset_type)

        ok_concat_classification2 = dataset2 + dataset1_classification
        self.assertEqual(AvalancheDatasetType.CLASSIFICATION,
                         ok_concat_classification2.dataset_type)
Exemplo n.º 18
0
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Create Task Dataset
training_datasets = []
test_datasets = []

assert (len(args.sequence_order) == len(args.task_order))

for i in range(len(args.sequence_order)):
    training_datasets.append(
        AvalancheDataset(ClassificationSubSequence(
            path_to_root=args.train_path_to_root,
            labelmap_file=args.labelmap_file,
            patch_size=64,
            subsequence_index=args.sequence_order[i],
            is_load_to_ram=True,
            color_transform=args.color_transform,
            is_gdumb=args.cl_strategy == "GDumb"),
                         task_labels=args.task_order[i]))

    test_datasets.append(
        AvalancheDataset(ClassificationSubSequence(
            path_to_root=args.val_path_to_root,
            labelmap_file=args.labelmap_file,
            patch_size=64,
            subsequence_index=args.sequence_order[i],
            is_load_to_ram=True,
            color_transform=args.color_transform),
                         task_labels=args.task_order[i]))
def split_detection_benchmark(n_experiences: int,
                              train_dataset,
                              test_dataset,
                              n_classes: int,
                              train_transform=None,
                              eval_transform=None,
                              shuffle=True):
    """
    Creates an example object detection/segmentation benchmark.

    This is a generator for toy benchmarks and should be used only to
    show how a detection benchmark can be created. It was not meant to be
    used for research purposes!

    :param n_experiences: The number of train experiences to create.
    :param train_dataset: The training dataset.
    :param test_dataset: The test dataset.
    :param n_classes: The number of categories (excluding the background).
    :param train_transform: The train transformation.
    :param eval_transform: The eval transformation.
    :param shuffle: If True, the dataset will be split randomly
    :return: A :class:`DetectionScenario` instance.
    """

    transform_groups = dict(
        train=(train_transform, None),
        eval=(eval_transform, None),
    )

    exp_n_imgs = len(train_dataset) // n_experiences
    remaining = len(train_dataset) % n_experiences

    train_dataset_avl = AvalancheDataset(train_dataset,
                                         transform_groups=transform_groups,
                                         initial_transform_group='train')
    test_dataset_avl = AvalancheDataset(test_dataset,
                                        transform_groups=transform_groups,
                                        initial_transform_group='eval')

    exp_sz = [exp_n_imgs for _ in range(n_experiences)]
    for exp_id in range(n_experiences):
        if remaining == 0:
            break

        exp_sz[exp_id] += 1
        remaining -= 1

    train_indices = [i for i in range(len(train_dataset_avl))]
    if shuffle:
        train_indices = torch.as_tensor(train_indices)[torch.randperm(
            len(train_indices))].tolist()

    train_exps_datasets = []
    last_slice_idx = 0
    for exp_id in range(n_experiences):
        n_imgs = exp_sz[exp_id]
        idx_range = train_indices[last_slice_idx:last_slice_idx + n_imgs]
        train_exps_datasets.append(
            AvalancheSubset(train_dataset_avl, indices=idx_range))
        last_slice_idx += n_imgs

    train_def = StreamUserDef(
        exps_data=train_exps_datasets,
        exps_task_labels=[0 for _ in range(len(train_exps_datasets))],
        origin_dataset=train_dataset,
        is_lazy=False)

    test_def = StreamUserDef(exps_data=[test_dataset_avl],
                             exps_task_labels=[0],
                             origin_dataset=test_dataset,
                             is_lazy=False)

    return DetectionCLScenario(n_classes=n_classes,
                               stream_definitions={
                                   'train': train_def,
                                   'test': test_def
                               },
                               complete_test_set_only=True)