Пример #1
0
class StreamForgetting(GenericStreamForgetting):
    """
    The StreamForgetting metric, describing the average evaluation accuracy loss
    detected over all experiences observed during training.

    This plugin metric, computed over all observed experiences during training,
    is the average over the difference between the accuracy result obtained
    after first training on a experience and the accuracy result obtained
    on the same experience at the end of successive experiences.

    This metric is computed during the eval phase only.
    """
    def __init__(self):
        """
        Creates an instance of the StreamForgetting metric.
        """

        super().__init__()

        self._current_metric = Accuracy()
        """
        The average accuracy over the current evaluation experience
        """

    def metric_update(self, strategy):
        self._current_metric.update(strategy.mb_y, strategy.mb_output, 0)

    def metric_result(self, strategy):
        return self._current_metric.result(0)[0]

    def __str__(self):
        return "StreamForgetting"
Пример #2
0
class TaskForgetting(GenericTaskForgetting):
    """
    The Task Forgetting metric returns the amount of forgetting
    on each task separately. The task-wise forgetting is computed
    as the difference between the average accuracy when last training
    on the task and the average accuracy when last evaluating on the same task.

    """
    def __init__(self):
        super().__init__()
        self._current_train_metric = Accuracy()
        self._current_eval_metric = Accuracy()

    def metric_update(self, strategy, train):
        # task labels defined for each experience
        task_labels = strategy.experience.task_labels
        if len(task_labels) > 1:
            # task labels defined for each pattern
            task_labels = strategy.mb_task_id
        else:
            task_labels = task_labels[0]
        if train:
            self._current_train_metric.update(strategy.mb_output,
                                              strategy.mb_y, task_labels)
        else:
            self._current_eval_metric.update(strategy.mb_output, strategy.mb_y,
                                             task_labels)

    def __str__(self):
        return "TaskForgetting"
Пример #3
0
class StreamForwardTransfer(GenericStreamForwardTransfer):
    """
    The Forward Transfer averaged over all the evaluation experiences.

    This plugin metric, computed over all observed experiences during training,
    is the average over the difference between the accuracy result obtained
    after the previous experience and the accuracy result obtained
    on random initialization.
    """

    def __init__(self):
        super().__init__()
        self._current_metric = Accuracy()
        """
        The average accuracy over the current evaluation experience
        """

    def metric_update(self, strategy):
        self._current_metric.update(strategy.mb_y, strategy.mb_output, 0)

    def metric_result(self, strategy):
        return self._current_metric.result(0)[0]

    def __str__(self):
        return "StreamForwardTransfer"
Пример #4
0
 def test_accuracy(self):
     metric = Accuracy()
     self.assertEqual(metric.result(), 0)
     metric.update(self.out, self.y)
     self.assertLessEqual(metric.result(), 1)
     self.assertGreaterEqual(metric.result(), 0)
     metric.reset()
     self.assertEqual(metric.result(), 0)
Пример #5
0
 def test_accuracy_task_per_pattern(self):
     metric = Accuracy()
     self.assertEqual(metric.result(), {})
     metric.update(self.out, self.y, self.task_labels)
     out = metric.result()
     for k, v in out.items():
         self.assertIn(k, self.task_labels.tolist())
         self.assertLessEqual(v, 1)
         self.assertGreaterEqual(v, 0)
     metric.reset()
     self.assertEqual(metric.result(), {})
Пример #6
0
def check_experts_accuracy(exml_benchmark):
    """Sanity check. Compute experts accuracy on the train stream."""
    print(
        type(exml_benchmark).__name__,
        "testing expert models on the original train stream",
    )
    for i, model in enumerate(exml_benchmark.expert_models_stream):
        model.to("cuda")
        acc = Accuracy()

        train_data = exml_benchmark.original_benchmark.train_stream[i].dataset
        for x, y, t in DataLoader(train_data,
                                  batch_size=256,
                                  pin_memory=True,
                                  num_workers=8):
            x, y, t = x.to("cuda"), y.to("cuda"), t.to("cuda")
            y_pred = model(x)
            acc.update(y_pred, y, t)
        print(f"(i={i}) Original model accuracy: {acc.result()}")
        model.to("cpu")
Пример #7
0
class ExperienceForwardTransfer(GenericExperienceForwardTransfer):
    """
    The Forward Transfer computed on each experience separately.
    The transfer is computed based on the accuracy metric.
    """
    def __init__(self):
        super().__init__()

        self._current_metric = Accuracy()
        """
        The average accuracy over the current evaluation experience
        """

    def metric_update(self, strategy):
        self._current_metric.update(strategy.mb_y, strategy.mb_output, 0)

    def metric_result(self, strategy):
        return self._current_metric.result(0)[0]

    def __str__(self):
        return "ExperienceForwardTransfer"
Пример #8
0
class StreamForgetting(PluginMetric[Dict[int, float]]):
    """
    The StreamForgetting metric, describing the average evaluation accuracy loss
    detected over all experiences observed during training.

    This plugin metric, computed over all observed experiences during training,
    is the average over the difference between the accuracy result obtained
    after first training on a experience and the accuracy result obtained
    on the same experience at the end of successive experiences.

    This metric is computed during the eval phase only.
    """
    def __init__(self):
        """
        Creates an instance of the StreamForgetting metric.
        """

        super().__init__()

        self.stream_forgetting = Mean()
        """
        The average forgetting over all experiences
        """

        self.forgetting = Forgetting()
        """
        The general metric to compute forgetting
        """

        self._current_accuracy = Accuracy()
        """
        The average accuracy over the current evaluation experience
        """

        self.eval_exp_id = None
        """
        The current evaluation experience id
        """

        self.train_exp_id = None
        """
        The last encountered training experience id
        """

    def reset(self) -> None:
        """
        Resets the forgetting metrics.

        Beware that this will also reset the initial accuracy of each
        experience!

        :return: None.
        """
        self.forgetting.reset()
        self.stream_forgetting.reset()

    def reset_last_accuracy(self) -> None:
        """
        Resets the last accuracy.

        This will preserve the initial accuracy value of each experience.
        To be used at the beginning of each eval experience.

        :return: None.
        """
        self.forgetting.reset_last()

    def exp_update(self, k, v, initial=False):
        """
        Update forgetting metric.
        See `Forgetting` for more detailed information.

        :param k: key to update
        :param v: value associated to k
        :param initial: update initial value. If False, update
            last value.
        """
        self.forgetting.update(k, v, initial=initial)

    def exp_result(self, k=None) -> Union[float, None, Dict[int, float]]:
        """
        Result for experience defined by a key.
        See `Forgetting` documentation for more detailed information.

        k: optional key from which compute forgetting.
        """
        return self.forgetting.result(k=k)

    def result(self, k=None) -> Union[float, None, Dict[int, float]]:
        """
        The average forgetting over all experience.

        k: optional key from which compute forgetting.
        """
        return self.stream_forgetting.result()

    def before_training_exp(self, strategy: 'BaseStrategy') -> None:
        self.train_exp_id = strategy.experience.current_experience

    def before_eval(self, strategy) -> None:
        self.reset_current_accuracy()
        self.stream_forgetting.reset()

    def before_eval_exp(self, strategy: 'BaseStrategy') -> None:
        self._current_accuracy.reset()

    def after_eval_iteration(self, strategy: 'BaseStrategy') -> None:
        self.eval_exp_id = strategy.experience.current_experience
        self._current_accuracy.update(strategy.mb_y, strategy.logits)

    def after_eval_exp(self, strategy: 'BaseStrategy') -> None:
        # update experience on which training just ended
        if self.train_exp_id == self.eval_exp_id:
            self.exp_update(self.eval_exp_id,
                            self._current_accuracy.result(),
                            initial=True)
        else:
            # update other experiences
            # if experience has not been encountered in training
            # its value will not be considered in forgetting
            self.exp_update(self.eval_exp_id, self._current_accuracy.result())

        # this checks if the evaluation experience has been
        # already encountered at training time
        # before the last training.
        # If not, forgetting should not be returned.
        if self.exp_result(k=self.eval_exp_id) is not None:
            exp_forgetting = self.exp_result(k=self.eval_exp_id)
            self.stream_forgetting.update(exp_forgetting, weight=1)

    def after_eval(self, strategy: 'BaseStrategy') -> \
            'MetricResult':
        return self._package_result(strategy)

    def _package_result(self, strategy: 'BaseStrategy') -> \
            MetricResult:
        metric_value = self.result()

        phase_name, _ = phase_and_task(strategy)
        stream = stream_type(strategy.experience)
        metric_name = '{}/{}_phase/{}_stream' \
            .format(str(self),
                    phase_name,
                    stream)
        plot_x_position = self.get_global_counter()

        return [MetricValue(self, metric_name, metric_value, plot_x_position)]

    def __str__(self):
        return "StreamForgetting"
Пример #9
0
# ConfusionMatrix, # Confusion Matrix
# CPUUsage, # CPU Usage
# DiskUsage, # Disk Usage
# MaxGPU, # Max GPU Usage
# MAC,  # Multiply and Accumulate
# MaxRAM, # Max RAM Usage
# ElapsedTime # Timing metrics

# create an instance of the standalone Accuracy metric
# initial accuracy is 0
acc_metric = Accuracy()
print("Initial Accuracy: ", acc_metric.result())  # output 0

# update method allows to keep the running average accuracy
# result method returns the current average accuracy
real_y = torch.tensor([1, 2]).long()
predicted_y = torch.tensor([1, 0]).float()
acc_metric.update(real_y, predicted_y)
acc = acc_metric.result()
print("Average Accuracy: ", acc)  # output 0.5

# you can continue to update the metric with new values
predicted_y = torch.tensor([1, 2]).float()
acc_metric.update(real_y, predicted_y)
acc = acc_metric.result()
print("Average Accuracy: ", acc)  # output 0.75

# reset accuracy to 0
acc_metric.reset()
print("After reset: ", acc_metric.result())  # output 0
Пример #10
0
class ExperienceForgetting(PluginMetric[Dict[int, float]]):
    """
    The ExperienceForgetting metric, describing the accuracy loss
    detected for a certain experience.

    This plugin metric, computed separately for each experience,
    is the difference between the accuracy result obtained after
    first training on a experience and the accuracy result obtained
    on the same experience at the end of successive experiences.

    This metric is computed during the eval phase only.
    """
    def __init__(self):
        """
        Creates an instance of the ExperienceForgetting metric.
        """

        super().__init__()

        self.forgetting = Forgetting()
        """
        The general metric to compute forgetting
        """

        self._last_accuracy = Accuracy()
        """
        The average accuracy over the current evaluation experience
        """

        self.eval_exp_id = None
        """
        The current evaluation experience id
        """

        self.train_exp_id = None
        """
        The last encountered training experience id
        """

    def reset(self) -> None:
        """
        Resets the metric.

        Beware that this will also reset the initial accuracy of each
        experience!

        :return: None.
        """
        self.forgetting.reset()

    def reset_last_accuracy(self) -> None:
        """
        Resets the last accuracy.

        This will preserve the initial accuracy value of each experience.
        To be used at the beginning of each eval experience.

        :return: None.
        """
        self.forgetting.reset_last()

    def update(self, k, v, initial=False):
        """
        Update forgetting metric.
        See `Forgetting` for more detailed information.

        :param k: key to update
        :param v: value associated to k
        :param initial: update initial value. If False, update
            last value.
        """
        self.forgetting.update(k, v, initial=initial)

    def result(self, k=None) -> Union[float, None, Dict[int, float]]:
        """
        See `Forgetting` documentation for more detailed information.

        k: optional key from which compute forgetting.
        """
        return self.forgetting.result(k=k)

    def before_training_exp(self, strategy: 'BaseStrategy') -> None:
        self.train_exp_id = strategy.experience.current_experience

    def before_eval(self, strategy) -> None:
        self.reset_last_accuracy()

    def before_eval_exp(self, strategy: 'BaseStrategy') -> None:
        self._last_accuracy.reset()

    def after_eval_iteration(self, strategy: 'BaseStrategy') -> None:
        self.eval_exp_id = strategy.experience.current_experience
        self._last_accuracy.update(strategy.mb_y, strategy.logits)

    def after_eval_exp(self, strategy: 'BaseStrategy') \
            -> MetricResult:
        # update experience on which training just ended
        if self.train_exp_id == self.eval_exp_id:
            self.update(self.eval_exp_id,
                        self._last_accuracy.result(),
                        initial=True)
        else:
            # update other experiences
            # if experience has not been encountered in training
            # its value will not be considered in forgetting
            self.update(self.eval_exp_id, self._last_accuracy.result())

        # this checks if the evaluation experience has been
        # already encountered at training time
        # before the last training.
        # If not, forgetting should not be returned.
        if self.result(k=self.eval_exp_id) is not None:
            return self._package_result(strategy)

    def _package_result(self, strategy: 'BaseStrategy') \
            -> MetricResult:

        forgetting = self.result(k=self.eval_exp_id)
        metric_name = get_metric_name(self, strategy, add_experience=True)
        plot_x_position = self._next_x_position(metric_name)

        metric_values = [
            MetricValue(self, metric_name, forgetting, plot_x_position)
        ]
        return metric_values

    def __str__(self):
        return "ExperienceForgetting"
Пример #11
0
    def test_standalone_accuracy(self):
        uut = Accuracy()

        # Initial accuracy should be 0
        self.assertEqual(0.0, uut.result())

        truth = torch.as_tensor([0, 5, 2, 1, 0])
        predicted = torch.as_tensor([2, 3, 2, 5, 0])  # correct 2/5 = 40%
        uut.update(truth, predicted)

        self.assertEqual(0.4, uut.result())

        truth = torch.as_tensor([0, 3, 2, 1, 0])
        predicted = torch.as_tensor([2, 3, 2, 5, 0])  # correct 3/5 = 60%
        uut.update(truth, predicted)

        self.assertEqual(0.5, uut.result())

        # After-reset accuracy should be 0
        uut.reset()
        self.assertEqual(0.0, uut.result())

        # Check if handles 0 accuracy
        truth = torch.as_tensor([0, 0, 0, 0])
        predicted = torch.as_tensor([1, 1, 1, 1])  # correct 0/4 = 0%
        uut.update(truth, predicted)

        self.assertEqual(0.0, uut.result())

        # Should throw exception when len(truth) != len(predicted)
        with self.assertRaises(ValueError):
            truth = torch.as_tensor([0, 0, 1, 0])
            predicted = torch.as_tensor([1, 1, 1])
            uut.update(truth, predicted)

        # Check accuracy didn't change after error
        self.assertEqual(0.0, uut.result())

        # Test logits / one-hot support
        uut.reset()

        # Test one-hot (truth)
        truth = torch.as_tensor([[0, 1], [1, 0], [1, 0], [1, 0]])
        predicted = torch.as_tensor([1, 1, 1, 1])  # correct 1/4 = 25%
        uut.update(truth, predicted)
        self.assertEqual(0.25, uut.result())

        # Test logits (predictions)
        truth = torch.as_tensor([1, 1, 0, 0])
        predicted = torch.as_tensor([[0.73, 0.1], [0.22, 0.33], [0.99, 0.01],
                                     [0.12, 0.11]])  # correct 3/4 = 75%
        uut.update(truth, predicted)
        self.assertEqual(0.5, uut.result())

        # Test one-hot (truth) + logits (predictions)
        truth = torch.as_tensor([[1, 0], [1, 0], [0, 1], [0, 1]])
        predicted = torch.as_tensor([[0.73, 0.1], [0.22, 0.33], [0.99, 0.01],
                                     [0.12, 0.11]])  # correct 1/4 = 25%
        uut.update(truth, predicted)
        self.assertEqual(5.0 / 12.0, uut.result())
Пример #12
0
from avalanche.evaluation.metrics import Accuracy

if __name__ == "__main__":
    # ExML scenarios provide a stream of pretrained models
    exml_benchmark = ExMLMNIST(scenario="split")

    print(
        type(exml_benchmark).__name__,
        "testing expert models on the original train stream",
    )
    for i, model in enumerate(exml_benchmark.expert_models_stream):
        # Each model is trained on a separate experience of the training stream.
        # Here we simply check the accuracy on the training experience
        # for each expert model.
        # Notice that most models have a very high (train) accuracy because they
        # overfitted their own experience.

        model.to("cuda")
        acc = Accuracy()

        train_data = exml_benchmark.original_benchmark.train_stream[i].dataset
        for x, y, t in DataLoader(train_data,
                                  batch_size=256,
                                  pin_memory=True,
                                  num_workers=8):
            x, y, t = x.to("cuda"), y.to("cuda"), t.to("cuda")
            y_pred = model(x)
            acc.update(y_pred, y, t)
        print(f"(i={i}) Original model accuracy: {acc.result()}")
        model.to("cpu")