def random_validation_split_strategy(
        validation_size: Union[int, float],
        shuffle: bool,
        experience: Experience):
    """
    The default splitting strategy used by
    :func:`benchmark_with_validation_stream`.

    This splitting strategy simply splits the experience in two experiences (
    train and validation) of size `validation_size`.

    When taking inspiration for your custom splitting strategy, please consider
    that all parameters preceding `experience` are filled by
    :func:`benchmark_with_validation_stream` by using `partial` from the
    `functools` standard library. A custom splitting strategy must have only
    a single parameter: the experience. Consider wrapping your custom
    splitting strategy with `partial` if more parameters are needed.

    Also consider that the stream name of the experience can be obtained by
    using `experience.origin_stream.name`.

    :param validation_size: The number of instances to allocate to the
    validation experience. Can be an int value or a float between 0 and 1.
    :param shuffle: If True, instances will be shuffled before splitting.
        Otherwise, the first instances will be allocated to the training
        dataset by leaving the last ones to the validation dataset.
    :param experience: The experience to split.
    :return: A tuple containing 2 elements: the new training and validation
        datasets.
    """

    exp_dataset = experience.dataset
    exp_indices = list(range(len(exp_dataset)))

    if shuffle:
        exp_indices = \
            torch.as_tensor(exp_indices)[
                torch.randperm(len(exp_indices))
            ].tolist()

    if 0.0 <= validation_size <= 1.0:
        valid_n_instances = int(validation_size * len(exp_dataset))
    else:
        valid_n_instances = int(validation_size)
        if valid_n_instances > len(exp_dataset):
            raise ValueError(
                f'Can\'t create the validation experience: nott enough '
                f'instances. Required {valid_n_instances}, got only'
                f'{len(exp_dataset)}')

    train_n_instances = len(exp_dataset) - valid_n_instances

    result_train_dataset = AvalancheSubset(
        exp_dataset, indices=exp_indices[:train_n_instances])
    result_valid_dataset = AvalancheSubset(
        exp_dataset, indices=exp_indices[train_n_instances:])

    return result_train_dataset, result_valid_dataset
Пример #2
0
    def create_sub_experience_list(experience):
        """Creates a list of sub-experiences from an experience.
        It returns a list of experiences, where each experience is
        a subset of the original experience.

        :param experience: single Experience.

        :return: list of Experience.
        """

        # Shuffle the indices
        indices = torch.randperm(len(experience.dataset))
        num_sub_exps = len(indices)
        mb_size = 1
        sub_experience_list = []
        for subexp_id in range(num_sub_exps):
            subexp_indices = indices[subexp_id * mb_size:(subexp_id + 1) *
                                     mb_size]
            sub_experience = copy.copy(experience)
            subexp_ds = AvalancheSubset(sub_experience.dataset,
                                        indices=subexp_indices)
            sub_experience.dataset = subexp_ds
            sub_experience_list.append(sub_experience)

        return sub_experience_list
Пример #3
0
def class_balanced_split_strategy(validation_size: Union[int, float],
                                  experience: Experience):
    """Class-balanced train/validation splits.

    This splitting strategy splits `experience` into two experiences
    (train and validation) of size `validation_size` using a class-balanced
    split. Sample of each class are chosen randomly.

    :param validation_size: The percentage of samples to allocate to the
        validation experience as a float between 0 and 1.
    :param experience: The experience to split.
    :return: A tuple containing 2 elements: the new training and validation
        datasets.
    """
    if not isinstance(validation_size, float):
        raise ValueError("validation_size must be an integer")
    if not 0.0 <= validation_size <= 1.0:
        raise ValueError("validation_size must be a float in [0, 1].")

    exp_dataset = experience.dataset
    if validation_size > len(exp_dataset):
        raise ValueError(f"Can't create the validation experience: not enough "
                         f"instances. Required {validation_size}, got only"
                         f"{len(exp_dataset)}")

    exp_indices = list(range(len(exp_dataset)))
    exp_classes = experience.classes_in_this_experience

    # shuffle exp_indices
    exp_indices = torch.as_tensor(exp_indices)[torch.randperm(
        len(exp_indices))]
    # shuffle the targets as well
    exp_targets = torch.as_tensor(experience.dataset.targets)[exp_indices]

    train_exp_indices = []
    valid_exp_indices = []
    for cid in exp_classes:  # split indices for each class separately.
        c_indices = exp_indices[exp_targets == cid]
        valid_n_instances = int(validation_size * len(c_indices))
        valid_exp_indices.extend(c_indices[:valid_n_instances])
        train_exp_indices.extend(c_indices[valid_n_instances:])

    result_train_dataset = AvalancheSubset(exp_dataset,
                                           indices=train_exp_indices)
    result_valid_dataset = AvalancheSubset(exp_dataset,
                                           indices=valid_exp_indices)
    return result_train_dataset, result_valid_dataset
Пример #4
0
def fixed_size_experience_split_strategy(
    experience_size: int,
    shuffle: bool,
    drop_last: bool,
    experience: ClassificationExperience,
):
    """
    The default splitting strategy used by :func:`data_incremental_benchmark`.

    This splitting strategy simply splits the experience in smaller experiences
    of size `experience_size`.

    When taking inspiration for your custom splitting strategy, please consider
    that all parameters preceding `experience` are filled by
    :func:`data_incremental_benchmark` by using `partial` from the `functools`
    standard library. A custom splitting strategy must have only a single
    parameter: the experience. Consider wrapping your custom splitting strategy
    with `partial` if more parameters are needed.

    Also consider that the stream name of the experience can be obtained by
    using `experience.origin_stream.name`.

    :param experience_size: The experience size (number of instances).
    :param shuffle: If True, instances will be shuffled before splitting.
    :param drop_last: If True, the last mini-experience will be dropped if
        not of size `experience_size`
    :param experience: The experience to split.
    :return: The list of datasets that will be used to create the
        mini-experiences.
    """

    exp_dataset = experience.dataset
    exp_indices = list(range(len(exp_dataset)))

    result_datasets = []

    if shuffle:
        exp_indices = torch.as_tensor(exp_indices)[torch.randperm(
            len(exp_indices))].tolist()

    init_idx = 0
    while init_idx < len(exp_indices):
        final_idx = init_idx + experience_size  # Exclusive
        if final_idx > len(exp_indices):
            if drop_last:
                break

            final_idx = len(exp_indices)

        result_datasets.append(
            AvalancheSubset(exp_dataset,
                            indices=exp_indices[init_idx:final_idx]))
        init_idx = final_idx

    return result_datasets