Exemplo n.º 1
    def get_loss(self, forward_pass: ForwardPass, y: Tensor = None) -> Loss:
        """Get a `Loss` for the given forward pass and resulting rewards/labels.
        Take a look at the `AuxiliaryTask` class for more info,

        NOTE: This is the same simplified version of EWC used throughout the
        other examples: the loss is the P-norm between the current weights and
        the weights as they were on the begining of the task.
        Also note, this particular example doesn't actually use the provided
        if self.previous_task is None:
            # We're in the first task: do nothing.
            return Loss(name=self.name)

        old_weights: Dict[str, Tensor] = self.previous_model_weights
        new_weights: Dict[str, Tensor] = dict(self.model.named_parameters())

        loss = 0.0
        for weight_name, (new_w,
                          old_w) in dict_intersection(new_weights,
            loss += torch.dist(new_w,

        ewc_loss = Loss(name=self.name, loss=loss)
        return ewc_loss
Exemplo n.º 2
 def get_loss(self,
              x: Tensor,
              h_x: Tensor,
              y_pred: Tensor,
              y: Tensor = None) -> Loss:
     if y is None:
         return Loss()
     if not y_pred.requires_grad:
         # Can't evaluate the IRM score when the y_pred doesn't require grad!
         with torch.enable_grad():
             y_pred = self.classifier(h_x)
             loss = self.penalty(y_pred, y)
             return Loss(loss)
     loss = self.penalty(y_pred, y)
     return Loss(loss)
Exemplo n.º 3
    def get_loss(self,
                 x: Tensor,
                 h_x: Tensor,
                 y_pred: Tensor,
                 y: Tensor = None) -> Loss:
        batch_size = x.shape[0]
        # assert batch_size % 2  == 0, f"Can only mix an even number of samples. (batch size is {batch_size})"
        if batch_size % 2 != 0:
            h_x = h_x[:-1]
            y_pred = y_pred[:-1]
        mix_coeff = torch.rand(batch_size // 2, dtype=x.dtype, device=x.device)

        h1 = h_x[0::2]
        h2 = h_x[1::2]
        mix_h_x = mixup(h1, h2, mix_coeff)

        y_pred_1 = y_pred[0::2]
        y_pred_2 = y_pred[1::2]
        y_pred_mix = mixup(y_pred_1, y_pred_2, mix_coeff)

        mix_y_pred = self.classifier(mix_h_x)

        loss = torch.dist(y_pred_mix, mix_y_pred)
        loss_info = Loss(
        return loss_info
Exemplo n.º 4
    def get_loss(self,
                 forward_pass: Dict[str, Tensor],
                 y: Tensor = None) -> Loss:
        x = forward_pass["x"]
        h_x = forward_pass["h_x"]
        h_x = h_x.view([h_x.shape[0], -1])
        mu, logvar = self.mu(h_x), self.logvar(h_x)
        z = self.reparameterize(mu, logvar)
        x_hat = self.decoder(z)

        recon_loss = self.reconstruction_loss(x_hat, x)
        kl_loss = self.options.beta * self.kl_divergence_loss(mu, logvar)
        loss = Loss(self.name,
                    tensors=dict(mu=mu, logvar=logvar, z=z, x_hat=x_hat))
        loss += Loss("recon", loss=recon_loss)
        loss += Loss("kl", loss=kl_loss)
        return loss
Exemplo n.º 5
    def get_loss_for_arg(self, x: Tensor, h_x: Tensor, fn_arg: Any, alpha: Tensor) -> Loss:
        alpha = alpha.to(x.device)
        # TODO: Transform before or after the `preprocess_inputs` function?
        x = fix_channels(x)
        # Transform X using the function.
        x_t = self.function(x, fn_arg)
        # Get the code for the transformed x.
        h_x_t = self.encode(x_t)

        aux_layer_input = h_x_t
        if self.options.compare_with_original:
            aux_layer_input = torch.cat([h_x, h_x_t], dim=-1)

        # Get the predicted argument of the transformation.
        alpha_t = self.auxiliary_layer(aux_layer_input)
        # get the metrics for this particular argument (accuracy, mse, etc.)
        if isinstance(fn_arg, int):
            name = f"{fn_arg}"
            name = f"{fn_arg:.3f}"
        loss = Loss(name)
        loss.loss = self.loss(alpha_t, alpha)
        loss.metrics[name] = get_metrics(x=x_t, h_x=h_x_t, y_pred=alpha_t, y=alpha)
        # Save some tensors for debugging purposes:
        loss.tensors["x_t"] = x_t
        loss.tensors["h_x_t"] = h_x_t
        loss.tensors["alpha_t"] = alpha_t
        return loss
Exemplo n.º 6
    def get_loss(self, forward_pass: ForwardPass, y: Tensor = None) -> Loss:
        """ Gets the EWC loss.
        if self._model.training:

        if self.previous_task is None or not self.enabled or self._shared_net is None:
            # We're in the first task: do nothing.
            return Loss(name=self.name)

        loss = 0.0
        v_current = PVector.from_model(self._shared_net)

        for fim in self.fisher_information_matrices:
            diff = v_current - self.previous_model_weights
            loss += fim.vTMv(diff)
        self._i += 1
        ewc_loss = Loss(name=self.name, loss=loss)
        return ewc_loss
Exemplo n.º 7
    def get_loss(self,
                 x: Tensor,
                 h_x: Tensor,
                 y_pred: Tensor,
                 y: Tensor = None) -> Loss:
        # select only unlabelled examples like in ICT: https://arxiv.org/pdf/1903.03825.pdf
        # TODO: fix this, y may be None, which would break this.

        batch_size = x.shape[0]
        # assert batch_size % 2  == 0, f"Can only mix an even number of samples. (batch size is {batch_size})"
        if batch_size % 2 != 0:
            x = x[:-1]
            y_pred = y_pred[:-1]

        loss = Loss(name=self.name)

        if self.epoch_in_task < self.options.consistency_rampup_starts:
            mixup_consistency_weight = 0.0
            mixup_consistency_weight = self.get_current_consistency_weight(
        if batch_size > 0 and mixup_consistency_weight > 0:
            #mix_coeff = torch.rand(batch_size//2, dtype=x.dtype, device=x.device)

            #x1 = x[0::2]
            #x2 = x[1::2]

            #mix_x = mixup(x1, x2, mix_coeff)

            #y_pred_1 = y_pred[0::2]
            #y_pred_2 = y_pred[1::2]

            h_x = self.mean_encode(x)
            y_pred_ema = self.mean_logits(h_x)
            mix_x, y_pred_mix, lam = mixup_data(
                x, Variable(y_pred_ema.detach().data, requires_grad=False),

            loss.tensors["mix_x"] = mix_x.detach()
            mix_h_x = self.encode(mix_x)
            mix_y_pred = self.classifier(mix_h_x)

            # Use the mean teacher to get the h_x and y_pred for the unlabeled data.
            #h_x = self.mean_encode(x)
            #y_pred = self.mean_logits(h_x)
            #y_pred_mix = mixup(y_pred_1, y_pred_2, mix_coeff)

            loss.tensors["y_pred_mix"] = y_pred_mix.detach()
            loss = self.consistency_criterion(mix_y_pred,
                                              y_pred_mix) / batch_size  #
            #loss = torch.dist(y_pred_mix, mix_y_pred)
            loss.total_loss = mixup_consistency_weight * loss
            loss.total_loss = torch.zeros(1,
        return loss
Exemplo n.º 8
    def get_loss(self,
                 forward_pass: ForwardPass,
                 rewards: Rewards = None,
                 loss_name: str = "") -> Loss:
        """Gets a Loss given the results of the forward pass and the reward.

            forward_pass (Dict[str, Tensor]): Results of the forward pass.
            reward (Tensor, optional): The reward that resulted from the action
                chosen in the forward pass. Defaults to None.
            loss_name (str, optional): The name for the resulting Loss.
                Defaults to "".

            Loss: a Loss object containing the loss tensor, associated metrics
            and sublosses.

        This could look a bit like this, for example:
        action = forward_pass["action"]
        predicted_reward = forward_pass["predicted_reward"]
        nce = self.loss_fn(predicted_reward, reward)
        loss = Loss(loss_name, loss=nce)
        return loss
        assert loss_name
        # Create an 'empty' Loss object with the given name, so that we always
        # return a Loss object, even when `y` is None and we can't the loss from
        # the output_head.
        total_loss = Loss(name=loss_name)
        if rewards:
            assert rewards.y is not None
            # TODO: If we decide to re-organize the forward pass object to also
            # contain the predictions of the self-supervised tasks, (atm they
            # perform their 'forward pass' in their get_loss functions)
            # then we could change 'actions' to be a dict, and index the
            # dict with the 'name' of each output head, like so:
            # actions_of_head = forward_pass.actions[self.output_head.name]
            # rewards_of_head = forward_pass.rewards[self.output_head.name]

            # For now though, we only have one "prediction" in the actions:
            actions = forward_pass.actions
            # So far we only use 'y' from the rewards in the output head.
            supervised_loss = self.output_head_loss(forward_pass,
            total_loss += supervised_loss

        return total_loss
Exemplo n.º 9
def get_knn_performance(
    x_t: np.ndarray,
    y_t: np.ndarray,
    scaler: StandardScaler,
    knn_classifier: KNeighborsClassifier,
    num_classes: int,
    loss_name: str = "KNN",
) -> Loss:
    # Flatten the inputs to two dimensions only.
    x_t = x_t.reshape(x_t.shape[0], -1)
    assert len(x_t.shape) == 2
    x_t = scaler.transform(x_t)
    y_t_prob = knn_classifier.predict_proba(x_t)

    classes = knn_classifier.classes_
    # make sure the classes are sorted:
    assert np.array_equal(sorted(classes), classes)

    if y_t_prob.shape[-1] == num_classes:
        y_t_logits = y_t_prob
        # Not all classes were encountered, so we need to 'expand' the predicted
        # logits to the right shape.
        logger.info(f"{y_t_prob.shape} {num_classes}")
        num_classes = max(num_classes, y_t_prob.shape[-1])

        y_t_logits = np.zeros([y_t_prob.shape[0], num_classes],

        for i, logits in enumerate(y_t_prob):
            for label, logit in zip(classes, logits):
                y_t_logits[i][label - 1] = logit

    ## We were constructing this to reorder the classes in case the ordering was
    ## not the same between the KNN's internal `classes_` attribute and the task
    ## classes, However I'm not sure if this is necessary anymore.

    # y_t_logits = np.zeros((y_t.size, y_t.max() + 1))
    # for i, label in enumerate(classes):
    #     y_t_logits[:, label] = y_t_prob[:, i]

    # We get the Negative Cross Entropy using the scikit-learn function, but we
    # could instead get it using pytorch's function (maybe even inside the
    # Loss object!
    nce_t = log_loss(y_true=y_t, y_pred=y_t_prob, labels=classes)
    # BUG: There is sometimes a case where some classes aren't present in
    # `classes_`, and as such the ClassificationMetrics object created in the
    # Loss constructor has an error.
    test_loss = Loss(loss_name, loss=nce_t, y_pred=y_t_logits, y=y_t)
    return test_loss
Exemplo n.º 10
 def get_loss(self,
              forward_pass: Dict[str, Tensor],
              y: Tensor = None) -> Loss:
     x = forward_pass["x"]
     h_x = forward_pass["h_x"]
     # y_pred = forward_pass["y_pred"]
     z = h_x.view([h_x.shape[0], -1])
     if self.decoder is None or self.decoder.output_shape != x.shape:
         self.decoder = self.create_decoder(x.shape)
     x_hat = self.decoder(z)
     assert x_hat.shape == x.shape, (
         f"reconstructed x should have same shape as original x! "
         f"({x_hat.shape} != {x.shape})")
     recon_loss = self.reconstruction_loss(x_hat, x)
     loss_info = Loss(name=self.name, loss=recon_loss)
     return loss_info
Exemplo n.º 11
    def get_loss(self,
                 forward_pass: Dict[str, Tensor],
                 y: Tensor = None) -> Loss:
        x = forward_pass["x"]
        # TODO: is there a more efficient way to do this than with a list
        # comprehension? (torch multiprocessing-ish?)
        # concat all the x's into a single list.
        x_t = torch.cat([self.augment(x_i) for x_i in x.cpu()],
                        dim=0)  # [2*B, C, H, W]
        h_t = self.encode(x_t.to(self.device)).flatten(
            start_dim=1)  # [2*B, repr_dim]
        z = self.projector(h_t)  # [2*B, proj_dim]
        loss = self.loss(z, self.hparams.xent_temp)
        loss_object = Loss(name=self.name, loss=loss)

        self.model.log("simclr_loss", loss_object.loss, prog_bar=True)
        return loss_object
Exemplo n.º 12
    def get_loss(self, x: Tensor, h_x: Tensor, y_pred: Tensor=None, y: Tensor=None) -> Loss:
        loss_info = Loss(self.name)
        batch_size = x.shape[0]
        assert self.alphas is not None, "set the `self.alphas` attribute in the base class."
        assert self.function_args is not None, "set the `self.function_args` attribute in the base class."

        # Get the loss for each transformation argument.
        for fn_arg, alpha in zip(self.function_args, self.alphas):
            loss_i = self.get_loss_for_arg(x=x, h_x=h_x, fn_arg=fn_arg, alpha=alpha)
            loss_info += loss_i
            # print(f"{self.name}_{fn_arg}", loss_i.metrics)

        # Fuse all the sub-metrics into a total metric.
        # For instance, all the "rotate_0", "rotate_90", "rotate_180", etc.
        metrics = loss_info.metrics
        total_metrics = sum(loss_info.metrics.values(), Metrics())
        # we actually add up all the metrics to get the "overall" metric.
        metrics[self.name] = total_metrics
        return loss_info
Exemplo n.º 13
    def get_loss(self,
                 forward_pass: Dict[str, Tensor],
                 rewards: Optional[Rewards] = None,
                 loss_name: str = "") -> Loss:
        """Trains the model on a batch of (potentially partially labeled) data. 

            forward_pass (Dict[str, Tensor]): WIP: The results of the forward
                pass (processed input, predictions, etc.)
            rewards (Union[Optional[Tensor], List[Optional[Tensor]]]):
                Labels associated with the data. Can either be:
                - None: fully unlabeled batch
                - Tensor: fully labeled batch
                - List[Optional[Tensor]]: Partially labeled batch.
            loss_name (str, optional): Name of the resulting loss object. Defaults to

            Loss: a loss object made from both the unsupervised and
                supervised losses. 

        # TODO: We could also just use '-1' instead as the 'no-label' val: this
        # would make it a bit simpler than having both numpy arrays and tensors
        # in the batch

        y: Union[Optional[Tensor], Sequence[Optional[Tensor]]] = rewards.y
        if y is None or all(y_i is not None for y_i in y):
            # Fully labeled/unlabeled batch
            # NOTE: Tensors can't have None items, so if we get a Tensor that
            # means that we have all task labels.
            labeled_ratio = float(y is not None)
            return super().get_loss(forward_pass, rewards, loss_name=loss_name)

        is_labeled: np.ndarray = np.asarray([y_i is not None for y_i in y])

        # Batch is maybe a mix of labeled / unlabeled data.
        labeled_y = y[is_labeled]
        # TODO: Might have to somehow re-order the results based on the indices?
        # TODO: Join (merge) the metrics? or keep them separate?
        labeled_forward_pass = {
            k: v[is_labeled]
            for k, v in forward_pass.items()
        unlabeled_forward_pass = {
            k: v[~is_labeled]
            for k, v in forward_pass.items()

        labeled_ratio = len(labeled_y) / len(y)
        logger.debug(f"Labeled ratio: {labeled_ratio}")

        # Create the 'total' loss for the batch, with the required name.
        # We will then create two 'sublosses', one named 'unsupervised' and one
        # named 'supervised', each containing the respective losses and metrics.
        # TODO: Make sure that this doesn't make it harder to get the metrics
        # from the Loss object. If it does, then we could maybe just fuse the
        # labeled and unlabeled losses and metrics, but that might also cause
        # issues.
        loss = Loss(name=loss_name)
        if unlabeled_forward_pass:
            # TODO: Setting a different loss name for the for this is definitely going to cause trouble!
            unsupervised_loss = super().get_loss(
            loss += unsupervised_loss

        if labeled_forward_pass:
            supervised_loss = super().get_loss(
            loss += supervised_loss

        return loss
Exemplo n.º 14
    def evaluate_knn(self, model: LightningModule) -> Tuple[Loss, Loss]:
        """ Evaluate the representations with a KNN in the context of CL.

        We shorten the train dataloaders to take only the first
        `knn_samples` samples in order to save some compute.
        TODO: Figure out a way to cleanly add the metrics from the callback to
        the ``log dict'' which is returned by the model. Right now they are
        only printed / logged to wandb directly from here. 
        setting = model.datamodule
        assert isinstance(setting, Setting)
        # TODO: Remove this if we want to use this for something else than a
        # Continual setting in the future.
        assert isinstance(setting, ClassIncrementalSetting)
        num_classes = setting.num_classes

        # Check wether the method has access to the task labels at train/test time.
        task_labels_at_test_time: bool = False
        from sequoia.settings import TaskIncrementalSetting
        if isinstance(setting, TaskIncrementalSetting):
            if setting.task_labels_at_test_time:
                task_labels_at_test_time = True
        # TODO: Figure out a way to make sure that we get at least one example
        # of each class to fit the KNN.
        self.knn_samples = max(self.knn_samples, num_classes**2)
        self.max_num_batches = math.ceil(self.knn_samples / model.batch_size)
        logger.info(f"number of classes: {num_classes}")
        logger.info(f"Number of KNN samples: {self.knn_samples}")
            f"Taking a maximum of {self.max_num_batches} batches from each dataloader."

        train_loaders: List[DataLoader] = self.get_dataloaders(model,
        valid_loaders: List[DataLoader] = self.get_dataloaders(model,
        test_loaders: List[DataLoader] = self.get_dataloaders(model,

        # Only take the first `knn_samples` samples from each dataloader.
        def shorten(dataloader: DataLoader):
            return take(dataloader, n=self.max_num_batches)

        if self.max_num_batches:
            train_loaders = list(map(shorten, train_loaders))
            valid_loaders = list(map(shorten, valid_loaders))
            test_loaders = list(map(shorten, test_loaders))

        # Create an iterator that alternates between each of the train dataloaders.
        # NOTE: we shortened each of the dataloaders just to be sure that we get at least
        train_loader = roundrobin(*train_loaders)

        h_x, y = get_hidden_codes_array(model=model,
                                        description="KNN (Train)")
        train_loss, scaler, knn_classifier = fit_knn(x=h_x,
        logger.info(f"KNN Train Acc: {train_loss.accuracy:.2%}")
        total_valid_loss = Loss("knn/valid")

        # Save the current task ID so we can reset it after testing.
        starting_task_id = model.setting.current_task_id

        for i, dataloader in enumerate(valid_loaders):
            if task_labels_at_test_time:
                model.on_task_switch(i, training=False)
            loss_i = evaluate(model=model,
            # We use `.absorb(loss_i)` here so that the metrics get merged.
            # That way, if we access `total_valid_loss.accuracy`, this gives the
            # accuracy over all the validation tasks.
            # If we instead used `+= loss_i`, then loss_i would become a subloss
            # of `total_valid_loss`, since they have different names.
            # TODO: Explain this in more detail somewhere else.
            logger.info(f"KNN Valid[{i}] Acc: {loss_i.accuracy:.2%}")

        logger.info(f"KNN Average Valid Acc: {total_valid_loss.accuracy:.2%}")

        total_test_loss = Loss("knn/test")
        for i, dataloader in enumerate(test_loaders):
            if task_labels_at_test_time:
                model.on_task_switch(i, training=False)

            # TODO Should we set the number of classes to be the number of
            # classes in the current task?

            loss_i = evaluate(
            logger.info(f"KNN Test[{i}] Acc: {loss_i.accuracy:.2%}")

        if task_labels_at_test_time:
            model.on_task_switch(starting_task_id, training=False)

        logger.info(f"KNN Average Test Acc: {total_test_loss.accuracy:.2%}")
        return total_valid_loss, total_test_loss
Exemplo n.º 15
 def log(self, loss_object: Loss):
     if self.trainer.logger: