示例#1
0
def split_train_val(dataset: TaskSet,
                    val_split: float = 0.1) -> Tuple[TaskSet, TaskSet]:
    """Split train dataset into two datasets, one for training and one for validation.

    :param dataset: A torch dataset, with .x and .y attributes.
    :param val_split: Percentage to allocate for validation, between [0, 1[.
    :return: A tuple a dataset, respectively for train and validation.
    """
    random_state = np.random.RandomState(seed=1)
    indexes = np.arange(len(dataset))
    random_state.shuffle(indexes)

    train_indexes = indexes[int(val_split * len(indexes)):]
    val_indexes = indexes[:int(val_split * len(indexes))]

    x_train, y_train, t_train = dataset.get_raw_samples(train_indexes)
    train_dataset = TaskSet(x_train,
                            y_train,
                            t_train,
                            trsf=dataset.trsf,
                            data_type=dataset.data_type)

    x_val, y_val, t_val = dataset.get_raw_samples(val_indexes)
    val_dataset = TaskSet(x_val,
                          y_val,
                          t_val,
                          trsf=dataset.trsf,
                          data_type=dataset.data_type)

    return train_dataset, val_dataset
示例#2
0
def test_get_random_samples(nb_samples):
    x = np.ones((10, 2, 2, 3))
    y = np.ones((10, ))
    t = np.ones((10, ))

    base_set = TaskSet(x, y, t, None)

    base_set.get_random_samples(nb_samples=nb_samples)
示例#3
0
def test_get_raw_samples(nb_samples):
    x = np.ones((10, 2, 2, 3))
    y = np.ones((10, ))
    t = np.ones((10, ))

    base_set = TaskSet(x, y, t, None)

    data, y_, t_ = base_set.get_raw_samples(indexes=range(nb_samples))

    assert (x[:nb_samples] == data).all()
    assert (y[:nb_samples] == y_).all()
    assert (t[:nb_samples] == t_).all()
示例#4
0
    def __getitem__(self, task_index: Union[int, slice]):
        """Returns a task by its unique index.

        :param task_index: The unique index of a task. As for List, you can use
                           indexing between [0, len], negative indexing, or
                           even slices.
        :return: A train PyTorch's Datasets.
        """
        if isinstance(task_index, slice):
            raise NotImplementedError(
                f"You cannot select multiple task ({task_index}) on OnlineFellowship scenario yet"
            )

        self.cl_dataset = self.cl_datasets[task_index]

        if isinstance(self.cl_dataset, _ContinuumDataset):
            x, y, _ = self.cl_dataset.get_data()
            t = np.ones(len(y)) * task_index

            taskset = TaskSet(x,
                              y,
                              t,
                              trsf=self._get_trsf(task_index,
                                                  self.transformations),
                              target_trsf=self._get_label_trsf(task_index),
                              data_type=self.cl_dataset.data_type,
                              bounding_boxes=self.cl_dataset.bounding_boxes)
        else:
            if not isinstance(self.cl_dataset, BaseTaskSet):
                raise ValueError(
                    "self.cl_datasets can only contain _ContinuumDataset or TaskSet"
                )
            taskset = self.cl_dataset
        return taskset
示例#5
0
def concat(task_sets: List[TaskSet]) -> TaskSet:
    """Concatenate a dataset A with one or many *other* datasets.

    The transformations will be those of the first dataset.

    :param Tasksets: A list of task sets.
    :return: A concatenated task set.
    """
    x, y, t = [], [], []

    data_type = task_sets[0].data_type

    for task_set in task_sets:
        if task_set.data_type != data_type:
            raise Exception(
                f"Invalid data type {task_set.data_type} != {data_type}")

        x.append(task_set._x)
        y.append(task_set._y)
        t.append(task_set._t)

    return TaskSet(np.concatenate(x),
                   np.concatenate(y),
                   np.concatenate(t),
                   trsf=task_sets[0].trsf,
                   data_type=data_type)
示例#6
0
def test_concat_method(nb_others):
    x = np.random.rand(10, 2, 2, 3)
    y = np.ones((10, ))
    t = np.ones((10, ))

    base_set = TaskSet(x, y, t, None)
    initial_len = len(base_set)

    others = [
        TaskSet(np.copy(x), np.copy(y), np.copy(t), None)
        for _ in range(nb_others)
    ]
    base_set.concat(*others)
    assert len(base_set) == initial_len + nb_others * initial_len
    loader = DataLoader(base_set)
    for x, y, t in loader:
        pass
示例#7
0
def test_split_train_val(val_split, nb_val):
    x = np.random.rand(10, 2, 2, 3)
    y = np.ones((10, ))
    t = np.ones((10, ))

    base_set = TaskSet(x, y, t, None)

    train_set, val_set = split_train_val(base_set, val_split)
    assert len(val_set) == nb_val
    assert len(train_set) + len(val_set) == len(base_set)
示例#8
0
    def __getitem__(self, task_index: Union[int, slice]):
        """Returns a task by its unique index.

        :param task_index: The unique index of a task. As for List, you can use
                           indexing between [0, len], negative indexing, or
                           even slices.
        :return: A train PyTorch's Datasets.
        """
        x, y, t = self._select_data_by_task(task_index)
        return TaskSet(x, y, t, self.trsf, data_type=self.cl_dataset.data_type)
示例#9
0
def taskset_subset(taskset: TaskSet, indices: np.ndarray) -> TaskSet:
    # x, y, t = taskset.get_raw_samples(indices)
    x, y, t = taskset.get_raw_samples(indices)
    # TODO: Not sure if/how to handle the `bounding_boxes` attribute here.
    bounding_boxes = taskset.bounding_boxes
    if bounding_boxes is not None:
        bounding_boxes = bounding_boxes[indices]
    return replace_taskset_attributes(
        taskset, x=x, y=y, t=t, bounding_boxes=bounding_boxes
    )
示例#10
0
def test_split_train_val_loading():
    x = np.random.rand(10, 2, 2, 3)
    y = np.ones((10, ))
    t = np.ones((10, ))

    base_set = TaskSet(x, y, t, None)

    train_set, val_set = split_train_val(base_set, 0.2)

    for task_set in (train_set, val_set):
        loader = DataLoader(task_set, batch_size=32)
        for x, y, t in loader:
            pass
示例#11
0
    def __getitem__(self, task_index):
        """Returns a task by its unique index.

        :param task_index: The unique index of a task, between 0 and len(loader) - 1. Or it could
                           be a list or a numpy array or even a slice.
        :return: A train PyTorch's Datasets.
        """
        x, y, _ = self.dataset

        if isinstance(task_index, slice):
            # Convert a slice to a list and respect the Python's advanced indexing conventions
            start = task_index.start if task_index.start is not None else 0
            stop = task_index.stop if task_index.stop is not None else len(self) + 1
            step = task_index.step if task_index.step is not None else 1
            task_index = list(range(start, stop, step))
            if len(task_index) == 0:
                raise ValueError(f"Invalid slicing resulting in no data (start={start}, end={stop}, step={step}).")
        elif isinstance(task_index, np.ndarray):
            task_index = list(task_index)
        elif isinstance(task_index, int):
            task_index = [task_index]
        else:
            raise TypeError(f"Invalid type of task index {type(task_index).__name__}.")

        task_index = set([_handle_negative_indexes(ti, len(self)) for ti in task_index])

        t = np.concatenate([
            (np.ones(len(x)) * ti).astype(np.int32) for ti in task_index
        ])
        x = np.concatenate([
            x for _ in range(len(task_index))
        ])
        if self.shared_label_space:
            y = np.concatenate([
                y for _ in range(len(task_index))
            ])
        else:
            # Different transformations have different labels even though
            # the original images were the same
            y = np.concatenate([
                y + ti * self.num_classes_per_task for ti in task_index
            ])

        trsf = [  # Non-used tasks have a None trsf
            self.get_task_transformation(ti)
            if ti in task_index else None
            for ti in range(len(self))
        ]

        return TaskSet(x, y, t, trsf, data_type=self.cl_dataset.data_type)
示例#12
0
def test_target_trsf(nb_classes):
    x = np.random.rand(10, 2, 2, 3)
    y = np.arange(10)
    t = np.ones((10, ))

    target_trsf = transforms.Lambda(lambda x: x % nb_classes)
    tasket = TaskSet(x, y, t, None, target_trsf=target_trsf)

    assert tasket.nb_classes == nb_classes, print(
        "target transform not applied in get_classes")

    loader = DataLoader(tasket)
    for x, y, t in loader:
        pass
示例#13
0
def test_concat_function(nb_others):
    x = np.random.rand(10, 2, 2, 3)
    y = np.ones((10, ))
    t = np.ones((10, ))

    task_sets = [
        TaskSet(np.copy(x), np.copy(y), np.copy(t), None)
        for _ in range(nb_others)
    ]

    concatenation = concat(task_sets)
    assert len(concatenation) == nb_others * 10
    loader = DataLoader(concatenation)
    for x, y, t in loader:
        pass
示例#14
0
def test_sampler_function(log):
    np.random.seed(1)
    torch.manual_seed(1)

    x = np.random.rand(100, 2, 2, 3)
    y = np.ones((100, ), dtype=np.int64)
    y[0] = 0
    t = np.ones((100, ))

    taskset = TaskSet(x, y, t, None)
    sampler = get_balanced_sampler(taskset, log=log)

    loader = DataLoader(taskset, sampler=sampler, batch_size=1)
    nb_0 = 0
    for x, y, t in loader:
        if 0 in y:
            nb_0 += 1
    assert nb_0 > 1
示例#15
0
    def __getitem__(self, task_index: Union[int, slice]) -> TaskSet:
        """Returns a task by its unique index.

        :param task_index: The unique index of a task. As for List, you can use
                           indexing between [0, len], negative indexing, or
                           even slices.
        :return: A train PyTorch's Datasets.
        """
        if isinstance(task_index, slice) and task_index.step is not None:
            raise ValueError("Step in slice for segmentation is not supported.")

        x, y, t, task_index = self._select_data_by_task(task_index)
        t = self._get_task_ids(t, task_index)

        return TaskSet(
            x, y, t,
            self.trsf,
            target_trsf=self._get_label_transformation(task_index),
            data_type=self.cl_dataset.data_type
        )
    def __getitem__(self, task_index):
        """Returns a task by its unique index.

        :param task_index: The unique index of a task, between 0 and len(loader) - 1.
        :return: A train PyTorch's Datasets.
        """
        if isinstance(task_index, slice):
            raise ValueError(
                "Incremental training based on transformations "
                "does not support slice, please provide only integer.")
        elif task_index < 0:  # Support for negative index, e.g. -1 == last
            while task_index < 0:
                task_index += len(self)

        self.update_task_indexes(task_index)
        if not self.shared_label_space:
            self.update_labels(task_index)
        train = self._select_data_by_task(task_index)
        trsf = self.get_task_transformation(task_index)

        return TaskSet(*train, trsf, data_type=self.cl_dataset.data_type)
示例#17
0
    def __getitem__(self, task_index: Union[int, slice]):
        """Returns a task by its unique index.

        :param task_index: The unique index of a task. As for List, you can use
                           indexing between [0, len], negative indexing, or
                           even slices.
        :return: A train PyTorch's Datasets.
        """
        if isinstance(task_index, slice) and isinstance(self.trsf, list):
            raise ValueError(
                f"You cannot select multiple task ({task_index}) when you have a "
                "different set of transformations per task")

        x, y, t, _, data_indexes = self._select_data_by_task(task_index)

        return TaskSet(x,
                       y,
                       t,
                       trsf=self.trsf[task_index] if isinstance(
                           self.trsf, list) else self.trsf,
                       data_type=self.cl_dataset.data_type,
                       bounding_boxes=self.cl_dataset.bounding_boxes,
                       data_indexes=data_indexes)