def setup(self, stage: str = None): if not self.has_prepared_data: self.prepare_data() super().setup(stage=stage) if stage not in (None, "fit", "test"): raise RuntimeError(f"`stage` should be 'fit', 'test' or None.") if stage in (None, "fit"): self.train_cl_dataset = self.train_cl_dataset or self.make_dataset( self.config.data_dir, download=False, train=True ) self.train_cl_loader = self.train_cl_loader or ClassIncremental( cl_dataset=self.train_cl_dataset, nb_tasks=self.nb_tasks, increment=self.increment, initial_increment=self.initial_increment, transformations=self.train_transforms, class_order=self.class_order, ) if not self.train_datasets and not self.val_datasets: for task_id, train_taskset in enumerate(self.train_cl_loader): train_taskset, valid_taskset = split_train_val( train_taskset, val_split=0.1 ) self.train_datasets.append(train_taskset) self.val_datasets.append(valid_taskset) # IDEA: We could do the remapping here instead of adding a wrapper later. if self.shared_action_space and isinstance( self.action_space, spaces.Discrete ): # If we have a shared output space, then they are all mapped to [0, n_per_task] self.train_datasets = list(map(relabel, self.train_datasets)) self.val_datasets = list(map(relabel, self.val_datasets)) if stage in (None, "test"): self.test_cl_dataset = self.test_cl_dataset or self.make_dataset( self.config.data_dir, download=False, train=False ) self.test_cl_loader = self.test_cl_loader or ClassIncremental( cl_dataset=self.test_cl_dataset, nb_tasks=self.nb_tasks, increment=self.test_increment, initial_increment=self.test_initial_increment, transformations=self.test_transforms, class_order=self.test_class_order, ) if not self.test_datasets: # TODO: If we decide to 'shuffle' the test tasks, then store the sequence of # task ids in a new property, probably here. # self.test_task_order = list(range(len(self.test_datasets))) self.test_datasets = list(self.test_cl_loader) # IDEA: We could do the remapping here instead of adding a wrapper later. if self.shared_action_space and isinstance( self.action_space, spaces.Discrete ): # If we have a shared output space, then they are all mapped to [0, n_per_task] self.test_datasets = list(map(relabel, self.test_datasets))
def load_dataset(dataset): if dataset == "MNIST": data = MNIST("MNIST", train=True, download=True) data = ClassIncremental(data, nb_tasks=10, transformations=transformations) elif dataset == "FashionMNIST": data = FashionMNIST("FashionMNIST", train=True, download=True) data = ClassIncremental(data, nb_tasks=10, transformations=transformations) return data
def test_inMemory_keepLabels_Fellowship(increment, dataset7c, dataset10c, dataset20c): fellow = Fellowship([dataset7c, dataset10c, dataset20c], update_labels=False) x, y, t = fellow.get_data() assert len(np.unique(t)) == 3 assert len(np.unique(y)) == 20 if isinstance(increment, list): with pytest.raises(Exception): scenario = ClassIncremental(fellow, increment=increment) else: scenario = ClassIncremental(fellow, increment=increment) assert scenario.nb_classes == 20 assert scenario.nb_tasks == 20
def test_inMemory_updateLabels_Fellowship(increment, dataset7c, dataset10c, dataset20c): fellow = Fellowship([dataset7c, dataset10c, dataset20c], update_labels=True) x, y, t = fellow.get_data() assert len(np.unique(t)) == 3 assert len(np.unique(y)) == 37 if isinstance(increment, list): continuum = ClassIncremental(fellow, increment=increment) assert continuum.nb_classes == 37 assert continuum.nb_tasks == len(increment) else: continuum = ClassIncremental(fellow, increment=increment) assert continuum.nb_tasks == 37 assert continuum.nb_classes == 37
def test_encode_scenario_inference_fct(): filename_h5 = "test_encode_scenario.hdf5" if os.path.exists(filename_h5): os.remove(filename_h5) train = gen_data() dummy = InMemoryDataset(*train) scenario = ClassIncremental(dummy, increment=1) model = nn.Linear(32 * 32 * 3, 50) inference_fct = lambda model, x: model(x.view(-1, 32 * 32 * 3)) encoded_scenario = encode_scenario(model=model, scenario=scenario, batch_size=64, filename=filename_h5, inference_fct=inference_fct) assert scenario.nb_tasks == encoded_scenario.nb_tasks assert len(scenario[0]) == len(encoded_scenario[0]) assert encoded_scenario[0][0][0].shape[0] == 50 os.remove(filename_h5)
def test_train_test_CUB200(): dataset_tr = CUB200(DATA_PATH, train=True) dataset_te = CUB200(DATA_PATH, train=False) scenario_tr = ClassIncremental(dataset_tr, nb_tasks=1) scenario_te = ClassIncremental(dataset_te, nb_tasks=1) assert len(scenario_tr[0]) != len(scenario_te[0]) for taskset in scenario_tr: loader = DataLoader(taskset) _, _, _ = next(iter(loader)) for taskset in scenario_te: loader = DataLoader(taskset) _, _, _ = next(iter(loader))
def test_encode_scenario_MNIST(): filename_h5 = "test_encode_scenario.hdf5" if os.path.exists(filename_h5): os.remove(filename_h5) dataset = MNIST(data_path=DATA_PATH, download=False, train=True) scenario = ClassIncremental(dataset, increment=2) model = nn.Linear(28 * 28, 50) inference_fct = lambda model, x: model(x.view(-1, 28 * 28)) encoded_scenario = encode_scenario(model=model, scenario=scenario, batch_size=264, filename=filename_h5, inference_fct=inference_fct) assert scenario.nb_tasks == encoded_scenario.nb_tasks for encoded_taskset, taskset in zip(encoded_scenario, scenario): assert len(encoded_taskset) == len(taskset) assert encoded_scenario[0][0][0].shape[0] == 50 os.remove(filename_h5)
def test_scenario_remapping(): list_tasks = np.arange(9, -1, -1) x_train, y_train, t_train = gen_data() dummy = InMemoryDataset(x_train, y_train, t_train, data_type=TaskType.IMAGE_PATH) scenario = ClassIncremental(dummy, increment=1) subscenario = create_subscenario(scenario, list_tasks) mapping = get_scenario_remapping(subscenario) np_classes = np.zeros(0) np_classes_remapped = np.zeros(0).astype(int) for taskset in subscenario: np_classes = np.concatenate( [np_classes, taskset.get_classes()], axis=0) np_classes_remapped = np.concatenate([ np_classes_remapped, remap_class_vector(taskset.get_classes(), mapping)[0] ], axis=0) nb_classes_seen = len(np_classes) assert np.equal(np_classes_remapped, np.arange(nb_classes_seen)).all() assert not np.equal(np_classes, np.arange(nb_classes_seen)).all()
def test_slicing_list(list_tasks): train, test = gen_data() dummy = InMemoryDataset(*train) scenario = ClassIncremental(dummy, increment=1) taskset = scenario[list_tasks] targets = np.sort(np.unique(taskset._y)) assert len(targets) == len(list_tasks), print(f"{len(targets)} - vs - {len(list_tasks)}")
def sample(self, seed: int = None, nb_tasks: int = None) -> _BaseScenario: if nb_tasks is None: nb_tasks = self.base_scenario.nb_tasks if nb_tasks != self.base_scenario.nb_tasks: AssertionError( "You can not change the number of tasks in the generator") # seed the generator if seed is None: seed = np.random.randint(10000) # generate a random class order class_order = self.get_class_order(seed) new_list_class = self.list_classes[class_order] # We create a scenario from base_scenario initial_increment = self.base_scenario.initial_increment nb_tasks = self.base_scenario.nb_tasks increment = self.base_scenario.increment cl_dataset = self.base_scenario.cl_dataset transformations = self.base_scenario.transformations scenario = ClassIncremental(cl_dataset=cl_dataset, nb_tasks=nb_tasks, increment=increment, initial_increment=initial_increment, transformations=transformations, class_order=new_list_class) return scenario
def test_increments(increment, initial_increment, nb_tasks): with tempfile.TemporaryDirectory() as train_path, tempfile.TemporaryDirectory() as test_path: gen_imagefolder(train_path) gen_imagefolder(test_path) clloader = ClassIncremental( ImageFolderDataset(train_path, test_path), increment=increment, initial_increment=initial_increment ) assert clloader.nb_tasks == nb_tasks seen_tasks = 0 for task_id, train_dataset in enumerate(clloader): seen_tasks += 1 if isinstance(increment, list): max_class = sum(increment[:task_id + 1]) min_class = sum(increment[:task_id]) elif initial_increment: max_class = initial_increment + increment * task_id min_class = initial_increment + increment * (task_id -1) if task_id > 0 else 0 else: max_class = increment * (task_id + 1) min_class = increment * task_id for _ in DataLoader(train_dataset): pass assert np.max(train_dataset.y) == max_class - 1 assert np.min(train_dataset.y) == min_class assert seen_tasks == nb_tasks
def test_encode_scenario(): filename_h5 = "test_encode_scenario.hdf5" if os.path.exists(filename_h5): os.remove(filename_h5) if torch.cuda.is_available(): inference_fct = (lambda model, x: model.to(torch.device('cuda:0')) (x.to(torch.device('cuda:0')))) else: inference_fct = (lambda model, x: model(x)) train = gen_data() x, y, t = train x = x.reshape(-1, 32 * 32 * 3) dummy = InMemoryDataset(x, y, t) scenario = ClassIncremental(dummy, increment=1) model = nn.Sequential(nn.Flatten(), nn.Linear(32 * 32 * 3, 50)) encoded_scenario = encode_scenario(model=model, scenario=scenario, batch_size=64, filename=filename_h5, inference_fct=inference_fct) assert scenario.nb_tasks == encoded_scenario.nb_tasks assert len(scenario[0]) == len(encoded_scenario[0]) os.remove(filename_h5)
def test_increments(increment, initial_increment, nb_tasks): train, test = gen_data() dummy = InMemoryDataset(*train) scenario = ClassIncremental(dummy, increment=increment, initial_increment=initial_increment) assert scenario.nb_tasks == nb_tasks seen_tasks = 0 for task_id, taskset in enumerate(scenario): seen_tasks += 1 if isinstance(increment, list): max_class = sum(increment[:task_id + 1]) min_class = sum(increment[:task_id]) elif initial_increment: max_class = initial_increment + increment * task_id min_class = initial_increment + increment * (task_id - 1) if task_id > 0 else 0 else: max_class = increment * (task_id + 1) min_class = increment * task_id for _ in DataLoader(taskset): pass assert np.max(taskset._y) == max_class - 1 assert np.min(taskset._y) == min_class assert seen_tasks == nb_tasks
def test_visualization_ClassIncremental(dataset, name, shape, init_increment, increment): trsf = None if name == "AwA2": trsf = [ transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) ] elif name == "Core50": trsf = [ transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) ] scenario = ClassIncremental(cl_dataset=dataset(data_path=DATA_PATH, download=True, train=True), increment=increment, initial_increment=init_increment, transformations=trsf) folder = "tests/samples/class_incremental/" if not os.path.exists(folder): os.makedirs(folder) for task_id, taskset in enumerate(scenario): taskset.plot(path=folder, title="{}_ClassIncremental_{}.jpg".format(name, task_id), nb_samples=100, shape=shape) loader = DataLoader(taskset) _, _, _ = next(iter(loader))
def test_slicing_list_path_array(list_tasks): x_train, y_train = gen_string() dummy = InMemoryDataset(x_train, y_train, data_type=TaskType.IMAGE_PATH) scenario = ClassIncremental(dummy, increment=1) subscenario = create_subscenario(scenario, list_tasks) assert subscenario.nb_tasks == len(list_tasks), print( f"{len(subscenario)} - vs - {len(list_tasks)}")
def test_slicing_list(list_tasks): train = gen_data() dummy = InMemoryDataset(*train) scenario = ClassIncremental(dummy, increment=1) subscenario = create_subscenario(scenario, list_tasks) assert subscenario.nb_tasks == len(list_tasks), print( f"{len(subscenario)} - vs - {len(list_tasks)}")
def test_slicing_nc(index, classes): train, test = gen_data() dummy = InMemoryDataset(*train, *test) clloader = ClassIncremental(dummy, increment=2) dataset = clloader[index] targets = np.sort(np.unique(dataset.y)) assert len(targets) == len(classes) assert (targets == np.array(classes)).all(), (targets, classes)
def test_Fellowship_Dimension_Fail(tmpdir, list_datasets): cl_dataset = Fellowship( datasets=[d(data_path=tmpdir, download=True, train=True) for d in list_datasets] ) # This does not work since CIFAR10 and MNIST data are not same shape with pytest.raises(ValueError): continuum = ClassIncremental(cl_dataset, increment=10)
def test_slicing_nc_no_end(start_index, classes): train, test = gen_data() dummy = InMemoryDataset(*train) scenario = ClassIncremental(dummy, increment=2) taskset = scenario[start_index:] targets = np.sort(np.unique(taskset._y)) assert len(targets) == len(classes) assert (targets == np.array(classes)).all(), (targets, classes)
def test_task_order_generator_nb_tasks(nb_tasks): train, test = gen_data() dummy = InMemoryDataset(*train) scenario = ClassIncremental(dummy, increment=1) scenario_generator = TaskOrderGenerator(scenario) sample_scenario = scenario_generator.sample(nb_tasks=nb_tasks) assert sample_scenario.nb_tasks == nb_tasks
def test_split_train_val(val_split): train, test = gen_data() dummy = InMemoryDataset(*train) scenario = ClassIncremental(dummy, increment=5) for taskset in scenario: train_taskset, val_taskset = split_train_val(taskset, val_split=val_split) assert int(val_split * len(taskset)) == len(val_taskset) assert len(val_taskset) + len(train_taskset) == len(taskset)
def test_nb_classes(fake_data): scenario = ClassIncremental( cl_dataset=fake_data, increment=2 ) assert scenario.nb_samples == 20 assert scenario.nb_classes == NB_CLASSES assert (scenario.classes == np.arange(NB_CLASSES)).all()
def test_split_train_val(val_split): train, test = gen_data() dummy = InMemoryDataset(*train, *test) clloader = ClassIncremental(dummy, increment=5) for dataset in clloader: train_dataset, val_dataset = split_train_val(dataset, val_split=val_split) assert int(val_split * len(dataset)) == len(val_dataset) assert len(val_dataset) + len(train_dataset) == len(dataset)
def test_class_order_generator(seed): train, test = gen_data() dummy = InMemoryDataset(*train) scenario = ClassIncremental(dummy, increment=1) scenario_generator = ClassOrderGenerator(scenario) sample_scenario = scenario_generator.sample(seed) class_order = scenario_generator.get_class_order(seed) assert (np.array(class_order) == np.array(sample_scenario.class_order)).all()
def test_tensor_type(increment, nb_tasks): train, test = gen_tensor_data() dummy = InMemoryDataset(*train, data_type="tensor") scenario = ClassIncremental(dummy, increment=increment) taskset = scenario[0] for x, y, t in DataLoader(taskset): continue assert scenario.nb_tasks == nb_tasks
def test_tensor_type_get_samples(increment, nb_tasks): train, test = gen_tensor_data() dummy = InMemoryDataset(*train, data_type="tensor") scenario = ClassIncremental(dummy, increment=increment) taskset = scenario[0] for x, y, t in DataLoader(taskset): continue x, y, t = taskset.get_random_samples(5)
def test_class_order_generator(): train, test = gen_data() dummy = InMemoryDataset(*train) scenario = ClassIncremental(dummy, increment=1) scenario_generator = ClassOrderGenerator(scenario) sample_scenario = scenario_generator.sample() assert sample_scenario.nb_tasks == scenario.nb_tasks assert sample_scenario.nb_classes == scenario.nb_classes assert (sample_scenario.classes == scenario.classes).all()
def test_scenario_CUB200_ClassIncremental(): dataset = CUB200(DATA_PATH, train=True, transform=None) scenario = ClassIncremental(dataset, increment=100, transformations=[Resize((224, 224)), ToTensor()]) print(f"Nb classes : {scenario.nb_classes} ") print(f"Nb tasks : {scenario.nb_tasks} ") for task_id, task_set in enumerate(scenario): print(f"Task {task_id} : {task_set.nb_classes} classes") task_set.plot(path="Archives/Samples/CUB200/CI", title="CUB200_InstanceIncremental_{}.jpg".format(task_id), nb_samples=100)
def test_slicing_empty(start, end): train, test = gen_data() dummy = InMemoryDataset(*train) scenario = ClassIncremental(dummy, increment=2) has_failed = False try: taskset = scenario[start_index:] except: has_failed = True assert has_failed
def test_taskid(fake_data, class_order): scenario = ClassIncremental( cl_dataset=fake_data, increment=2 ) for task_id, taskset in enumerate(scenario): loader = DataLoader(taskset, batch_size=32) for x, y, t in loader: assert t[0].item() == task_id assert (t == task_id).all()