示例#1
0
    def _construct_exemplar_set(self,
                                task_data: Dataset,
                                dist_args=None,
                                **kwargs) -> None:
        """
        update the buffer with the new task exemplars, chosen randomly for each class.

        Args:
            new_task_data (Dataset): The new task data
            dist_args (Optional[Dict]): a dictionary of the distributed processing values in case of multiple gpu (ex:
            rank of the device) (default: None)
        """
        distributed = dist_args is not None
        new_class_labels = task_data.cur_task

        for class_label in new_class_labels:
            num_images_to_add = min(self.n_mems_per_cla,
                                    self.max_mems_pool_size)
            if not distributed:
                class_images_indices = task_data.get_image_indices_by_cla(
                    class_label, num_images_to_add)
            else:
                raise NotImplementedError

            for image_index in class_images_indices:
                image, label1, label2 = task_data.get_item(image_index)
                if label2 != NO_LABEL_PLACEHOLDER:
                    warnings.warn(
                        f"Sample is being added to the buffer with labels {label1} and {label2}"
                    )
                self.add_sample(class_label, image, (label1, label2))
def test_Dataset_divide_data_across_tasks_CIL(expected_task_data_size):
    lifelong_dataset = Dataset(dataset,
                               tasks,
                               setup=CIL_SETUP,
                               essential_transforms_fn=lambda x: x)
    for task_id in range(len(tasks)):
        assert len(lifelong_dataset.task_id_to_data_idx[task_id]
                   ) == expected_task_data_size[task_id]
示例#3
0
    def _construct_exemplar_set(self,
                                task_data: Dataset,
                                dist_args: Optional[Dict] = None,
                                **kwargs) -> None:
        """
        update the buffer with the new task exemplars, chosen randomly for each class.

        Args:
            new_task_data (Dataset): The new task data
            dist_args (Optional[Dict]): a dictionary of the distributed processing values in case of multiple gpu (ex:
            rank of the device) (default: None)
        """
        distributed = dist_args is not None
        if distributed:
            rank = dist_args['rank']
        else:
            rank = 0
        new_class_labels = task_data.cur_task

        for class_label in new_class_labels:
            num_images_to_add = min(self.n_mems_per_cla,
                                    self.max_mems_pool_size)
            class_images_indices = task_data.get_image_indices_by_cla(
                class_label, num_images_to_add)
            if distributed:
                device = torch.device(f"cuda:{dist_args['gpu']}")
                class_images_indices_to_broadcast = torch.from_numpy(
                    class_images_indices).to(device)
                torch.distributed.broadcast(class_images_indices_to_broadcast,
                                            0)
                class_images_indices = class_images_indices_to_broadcast.cpu(
                ).numpy()

            for image_index in class_images_indices:
                image, label1, label2 = task_data.get_item(image_index)
                if label2 != NO_LABEL_PLACEHOLDER:
                    warnings.warn(
                        f"Sample is being added to the buffer with labels {label1} and {label2}"
                    )
                self.add_sample(class_label,
                                image, (label1, label2),
                                rank=rank)
def test_Dataset_divide_data_across_tasks_IIRC_Train(expected_task_data_size):
    lifelong_dataset = Dataset(dataset,
                               tasks,
                               essential_transforms_fn=lambda x: x,
                               setup=IIRC_SETUP,
                               test_mode=False,
                               superclass_data_pct=superclass_data_pct,
                               subclass_data_pct=subclass_data_pct)
    for task_id in range(len(tasks)):
        assert len(lifelong_dataset.task_id_to_data_idx[task_id]
                   ) == expected_task_data_size[task_id]
示例#5
0
    def _prepare_model_for_new_task(self, task_data: Dataset, dist_args: Optional[dict] = None,
                                    **kwargs) -> None:
        """
        A method specific function that takes place before the starting epoch of each new task (runs from the
        prepare_model_for_task function).
        It copies the old network and freezes it's gradients.
        It also extends the output layer, imprints weights for those extended nodes, and change the trainable parameters

        Args:
            task_data (Dataset): The new task dataset
            dist_args (Optional[Dict]): a dictionary of the distributed processing values in case of multiple gpu (ex:
            rank of the device) (default: None)
        """
        self.old_net = copy_freeze(self.net)
        self.old_net.eval()

        cur_task_id = self.cur_task_id
        num_old_classes = int(sum(self.n_cla_per_tsk[: cur_task_id]))
        num_new_classes = self.n_cla_per_tsk[cur_task_id]
        device = next(self.net.parameters()).device

        # Extend last layer
        if cur_task_id > 0:
            output_layer = cosine_linear.SplitCosineLinear(in_features=self.latent_dim,
                                                           out_features1=num_old_classes,
                                                           out_features2=num_new_classes,
                                                           sigma=self.sigma).to(device)
            if cur_task_id == 1:
                output_layer.fc1.weight.data = self.net.model.output_layer.weight.data
            else:
                out_features1 = self.net.model.output_layer.fc1.out_features
                output_layer.fc1.weight.data[:out_features1] = self.net.model.output_layer.fc1.weight.data
                output_layer.fc1.weight.data[out_features1:] = self.net.model.output_layer.fc2.weight.data
            output_layer.sigma.data = self.net.model.output_layer.sigma.data
            self.net.model.output_layer = output_layer
            self.lambda_cur = self.lambda_base * math.sqrt(num_old_classes * 1.0 / num_new_classes)
            print_msg(f"Lambda for less forget is set to {self.lambda_cur}")
        elif cur_task_id != 0:
            raise ValueError("task id cannot be negative")

        # Imprint weights
        with task_data.disable_augmentations():
            if cur_task_id > 0:
                print_msg("Imprinting weights")
                self.net = self._imprint_weights(task_data, self.net, dist_args)

        # Fix parameters of FC1 for less forget and reset optimizer/scheduler
        if cur_task_id > 0:
            trainable_parameters = [param for name, param in self.net.named_parameters() if
                                    "output_layer.fc1" not in name]
        else:
            trainable_parameters = self.net.parameters()
        self.reset_optimizer_and_scheduler(trainable_parameters)
def test_choose_task(expected_task_data_size):
    lifelong_dataset = Dataset(dataset,
                               tasks,
                               essential_transforms_fn=lambda x: x,
                               setup=IIRC_SETUP,
                               superclass_data_pct=superclass_data_pct,
                               subclass_data_pct=subclass_data_pct)

    for task_id in range(len(tasks)):
        lifelong_dataset.choose_task(task_id)
        assert len(lifelong_dataset) == expected_task_data_size[task_id]
        assert set(lifelong_dataset.seen_classes) == set(
            [cla for task in tasks[:task_id + 1] for cla in task])

        # test that the no labels from outside the current task are given (when not using complete information mode in
        # the case of IIRC)
        assert lifelong_dataset.cur_task == tasks[task_id]
        for i in range(len(lifelong_dataset)):
            image, label_1, label_2 = lifelong_dataset[i]
            # Check that only one label is given when not using the complete information mode (in IIRC)
            assert label_2 == NO_LABEL_PLACEHOLDER
            assert label_1 in tasks[task_id]
def test_complete_information_mode(expected_task_data_size,
                                   expected_data_up_to_size):
    lifelong_dataset = Dataset(dataset,
                               tasks,
                               essential_transforms_fn=lambda x: x,
                               setup=IIRC_SETUP,
                               superclass_data_pct=superclass_data_pct,
                               subclass_data_pct=subclass_data_pct)
    lifelong_dataset.enable_complete_information_mode()

    for task_id in range(len(tasks)):
        lifelong_dataset.choose_task(task_id)
        assert len(lifelong_dataset) == expected_task_data_size[task_id]
        assert set(lifelong_dataset.seen_classes) == set(
            [cla for task in tasks[:task_id + 1] for cla in task])

        # test that all labels that have been observed so far are given
        assert lifelong_dataset.cur_task == tasks[task_id]
        for i in range(len(lifelong_dataset)):
            image, label_1, label_2 = lifelong_dataset[i]
            assert label_1 == NO_LABEL_PLACEHOLDER or label_1 in lifelong_dataset.seen_classes
            assert label_2 == NO_LABEL_PLACEHOLDER or label_2 in lifelong_dataset.seen_classes
            if label_1 in subclasses_superclasses.keys():
                assert label_2 == subclasses_superclasses[label_1]
            elif label_2 in subclasses_superclasses.keys():
                assert label_1 == subclasses_superclasses[label_2]

        lifelong_dataset.load_tasks_up_to(task_id)
        assert len(lifelong_dataset) == expected_data_up_to_size[task_id]
        assert set(lifelong_dataset.cur_task) == set(
            [cla for task in tasks[:task_id + 1] for cla in task])
        for i in range(len(lifelong_dataset)):
            image, label_1, label_2 = lifelong_dataset[i]
            assert label_1 == NO_LABEL_PLACEHOLDER or label_1 in lifelong_dataset.seen_classes
            assert label_2 == NO_LABEL_PLACEHOLDER or label_2 in lifelong_dataset.seen_classes
            if label_1 in subclasses_superclasses.keys():
                assert label_2 == subclasses_superclasses[label_1]
            elif label_2 in subclasses_superclasses.keys():
                assert label_1 == subclasses_superclasses[label_2]
示例#8
0
    def _imprint_weights(self, task_data: Dataset, model: Union[ResNet, ResNetCIFAR],
                         dist_args: Optional[dict] = None) -> Union[ResNet, ResNetCIFAR]:
        distributed = dist_args is not None
        if distributed:
            device = torch.device(f"cuda:{dist_args['gpu']}")
        else:
            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        class_names = task_data.cur_task
        class_names_2_idx = self.class_names_to_idx
        model.eval()
        num_old_classes = model.model.output_layer.fc1.out_features
        old_weights_norm = model.model.output_layer.fc1.weight.data.norm(dim=1, keepdim=True)
        average_old_weights_norm = torch.mean(old_weights_norm, dim=0)
        new_weights = torch.zeros_like(model.model.output_layer.fc2.weight.data)
        for cla in class_names:
            cla_id = class_names_2_idx[cla]
            if cla_id < num_old_classes:
                continue
            num_samples = 1000
            class_indices = task_data.get_image_indices_by_cla(cla, num_samples=num_samples, shuffle=False)
            if distributed:  # make sure all the gpus use the same random indices
                class_data_indices_to_broadcast = torch.from_numpy(class_indices).to(device)
                torch.distributed.broadcast(class_data_indices_to_broadcast, 0)
                class_indices = class_data_indices_to_broadcast.cpu().numpy()
            sampler = SubsetSampler(class_indices)
            class_loader = DataLoader(task_data, batch_size=self.batch_size, sampler=sampler)
            normalized_latent_feat = []
            with torch.no_grad():
                for minibatch in class_loader:
                    inputs = minibatch[0].to(device)
                    output, latent_features = model(inputs)
                    latent_features = latent_features.detach()
                    latent_features = F.normalize(latent_features, p=2, dim=-1)
                    normalized_latent_feat.append(latent_features)
                normalized_latent_feat = torch.cat(normalized_latent_feat, dim=0)
                mean_latent_feat = torch.mean(normalized_latent_feat, dim=0)
                normalized_mean_latent = F.normalize(mean_latent_feat, p=2, dim=0)
                new_weights[cla_id - num_old_classes] = normalized_mean_latent * average_old_weights_norm
        model.model.output_layer.fc2.weight.data = new_weights
        return model
示例#9
0
    def _construct_exemplar_set(self,
                                task_data: Dataset,
                                dist_args: Optional[dict] = None,
                                model: torch.nn.Module = None,
                                batch_size=1,
                                **kwargs):
        """
        Update the buffer with the new task samples using herding

        Args:
            task_data (Dataset): The new task data
            dist_args (Optional[Dict]): a dictionary of the distributed processing values in case of multiple gpu (ex:
            rank of the device) (default: None)
            model (BaseMethod): The current method object to calculate the latent variables
            batch_size (int): The minibatch size
        """
        distributed = dist_args is not None
        if distributed:
            device = torch.device(f"cuda:{dist_args['gpu']}")
            rank = dist_args['rank']
        else:
            device = torch.device(
                'cuda' if torch.cuda.is_available() else 'cpu')
            rank = 0
        new_class_labels = task_data.cur_task
        model.eval()

        with task_data.disable_augmentations(
        ):  # disable augmentations then enable them (if they were already enabled)
            with torch.no_grad():
                for class_label in new_class_labels:
                    class_data_indices = task_data.get_image_indices_by_cla(
                        class_label, self.max_mems_pool_size)
                    if distributed:
                        device = torch.device(f"cuda:{dist_args['gpu']}")
                        class_data_indices_to_broadcast = torch.from_numpy(
                            class_data_indices).to(device)
                        dist.broadcast(class_data_indices_to_broadcast, 0)
                        class_data_indices = class_data_indices_to_broadcast.cpu(
                        ).numpy()
                    sampler = SubsetSampler(class_data_indices)
                    class_loader = DataLoader(task_data,
                                              batch_size=batch_size,
                                              sampler=sampler)
                    latent_vectors = []
                    for minibatch in class_loader:
                        images = minibatch[0].to(device)
                        output, out_latent = model.forward_net(images)
                        out_latent = out_latent.detach()
                        out_latent = F.normalize(out_latent, p=2, dim=-1)
                        latent_vectors.append(out_latent)
                    latent_vectors = torch.cat(latent_vectors, dim=0)
                    class_mean = torch.mean(latent_vectors, dim=0)

                    chosen_exemplars_ind = []
                    exemplars_mean = torch.zeros_like(class_mean)
                    while len(chosen_exemplars_ind) < min(
                            self.n_mems_per_cla, len(class_data_indices)):
                        potential_exemplars_mean = (exemplars_mean.unsqueeze(0) * len(chosen_exemplars_ind) + latent_vectors) \
                                                   / (len(chosen_exemplars_ind) + 1)
                        distance = (class_mean.unsqueeze(0) -
                                    potential_exemplars_mean).norm(dim=-1)
                        shuffled_index = torch.argmin(distance).item()
                        exemplars_mean = potential_exemplars_mean[
                            shuffled_index, :].clone()
                        exemplar_index = class_data_indices[shuffled_index]
                        chosen_exemplars_ind.append(exemplar_index)
                        latent_vectors[shuffled_index, :] = float("inf")

                    for image_index in chosen_exemplars_ind:
                        image, label1, label2 = task_data.get_item(image_index)
                        if label2 != NO_LABEL_PLACEHOLDER:
                            warnings.warn(
                                f"Sample is being added to the buffer with labels {label1} and {label2}"
                            )
                        self.add_sample(class_label,
                                        image, (label1, label2),
                                        rank=rank)
def test_get_shuffled_image_indices(class1, class2, class1_size0,
                                    class1_size01, class2_size1,
                                    class2_size01):
    lifelong_dataset = Dataset(dataset,
                               tasks,
                               essential_transforms_fn=lambda x: x,
                               setup=IIRC_SETUP,
                               superclass_data_pct=superclass_data_pct,
                               subclass_data_pct=subclass_data_pct)
    assert class1 in tasks[0]
    assert class2 in tasks[1]

    lifelong_dataset.choose_task(0)
    class1_indices = lifelong_dataset.get_image_indices_by_cla(class1)
    assert len(class1_indices) == class1_size0
    for idx in class1_indices:
        image, label_1, label_2 = lifelong_dataset[idx]
        assert label_1 == class1
        assert label_2 == NO_LABEL_PLACEHOLDER

    lifelong_dataset.choose_task(1)
    class2_indices = lifelong_dataset.get_image_indices_by_cla(class2)
    assert len(class2_indices) == class2_size1
    for idx in class2_indices:
        image, label_1, label_2 = lifelong_dataset[idx]
        assert label_1 == class2
        assert label_2 == NO_LABEL_PLACEHOLDER

    lifelong_dataset.enable_complete_information_mode()
    lifelong_dataset.load_tasks_up_to(1)
    class1_indices = lifelong_dataset.get_image_indices_by_cla(class1)
    class2_indices = lifelong_dataset.get_image_indices_by_cla(class2)
    assert len(class1_indices) == class1_size01
    assert len(class2_indices) == class2_size01
    for idx in class1_indices:
        image, label_1, label_2 = lifelong_dataset[idx]
        if label_1 == class1:
            assert label_2 == class2 or label_2 == NO_LABEL_PLACEHOLDER
        elif label_2 == class1:
            assert label_1 == class2 or label_1 == NO_LABEL_PLACEHOLDER
        else:
            raise ValueError(f"{class1} is not there")
    for idx in class2_indices:
        image, label_1, label_2 = lifelong_dataset[idx]
        if label_1 == class2:
            assert label_2 == class1
        elif label_2 == class2:
            assert label_1 == class1
        else:
            raise ValueError(f"{class2} is not there")