class StreamForgetting(GenericStreamForgetting): """ The StreamForgetting metric, describing the average evaluation accuracy loss detected over all experiences observed during training. This plugin metric, computed over all observed experiences during training, is the average over the difference between the accuracy result obtained after first training on a experience and the accuracy result obtained on the same experience at the end of successive experiences. This metric is computed during the eval phase only. """ def __init__(self): """ Creates an instance of the StreamForgetting metric. """ super().__init__() self._current_metric = Accuracy() """ The average accuracy over the current evaluation experience """ def metric_update(self, strategy): self._current_metric.update(strategy.mb_y, strategy.mb_output, 0) def metric_result(self, strategy): return self._current_metric.result(0)[0] def __str__(self): return "StreamForgetting"
class TaskForgetting(GenericTaskForgetting): """ The Task Forgetting metric returns the amount of forgetting on each task separately. The task-wise forgetting is computed as the difference between the average accuracy when last training on the task and the average accuracy when last evaluating on the same task. """ def __init__(self): super().__init__() self._current_train_metric = Accuracy() self._current_eval_metric = Accuracy() def metric_update(self, strategy, train): # task labels defined for each experience task_labels = strategy.experience.task_labels if len(task_labels) > 1: # task labels defined for each pattern task_labels = strategy.mb_task_id else: task_labels = task_labels[0] if train: self._current_train_metric.update(strategy.mb_output, strategy.mb_y, task_labels) else: self._current_eval_metric.update(strategy.mb_output, strategy.mb_y, task_labels) def __str__(self): return "TaskForgetting"
class StreamForwardTransfer(GenericStreamForwardTransfer): """ The Forward Transfer averaged over all the evaluation experiences. This plugin metric, computed over all observed experiences during training, is the average over the difference between the accuracy result obtained after the previous experience and the accuracy result obtained on random initialization. """ def __init__(self): super().__init__() self._current_metric = Accuracy() """ The average accuracy over the current evaluation experience """ def metric_update(self, strategy): self._current_metric.update(strategy.mb_y, strategy.mb_output, 0) def metric_result(self, strategy): return self._current_metric.result(0)[0] def __str__(self): return "StreamForwardTransfer"
def test_accuracy(self): metric = Accuracy() self.assertEqual(metric.result(), 0) metric.update(self.out, self.y) self.assertLessEqual(metric.result(), 1) self.assertGreaterEqual(metric.result(), 0) metric.reset() self.assertEqual(metric.result(), 0)
def test_accuracy_task_per_pattern(self): metric = Accuracy() self.assertEqual(metric.result(), {}) metric.update(self.out, self.y, self.task_labels) out = metric.result() for k, v in out.items(): self.assertIn(k, self.task_labels.tolist()) self.assertLessEqual(v, 1) self.assertGreaterEqual(v, 0) metric.reset() self.assertEqual(metric.result(), {})
def check_experts_accuracy(exml_benchmark): """Sanity check. Compute experts accuracy on the train stream.""" print( type(exml_benchmark).__name__, "testing expert models on the original train stream", ) for i, model in enumerate(exml_benchmark.expert_models_stream): model.to("cuda") acc = Accuracy() train_data = exml_benchmark.original_benchmark.train_stream[i].dataset for x, y, t in DataLoader(train_data, batch_size=256, pin_memory=True, num_workers=8): x, y, t = x.to("cuda"), y.to("cuda"), t.to("cuda") y_pred = model(x) acc.update(y_pred, y, t) print(f"(i={i}) Original model accuracy: {acc.result()}") model.to("cpu")
class ExperienceForwardTransfer(GenericExperienceForwardTransfer): """ The Forward Transfer computed on each experience separately. The transfer is computed based on the accuracy metric. """ def __init__(self): super().__init__() self._current_metric = Accuracy() """ The average accuracy over the current evaluation experience """ def metric_update(self, strategy): self._current_metric.update(strategy.mb_y, strategy.mb_output, 0) def metric_result(self, strategy): return self._current_metric.result(0)[0] def __str__(self): return "ExperienceForwardTransfer"
class StreamForgetting(PluginMetric[Dict[int, float]]): """ The StreamForgetting metric, describing the average evaluation accuracy loss detected over all experiences observed during training. This plugin metric, computed over all observed experiences during training, is the average over the difference between the accuracy result obtained after first training on a experience and the accuracy result obtained on the same experience at the end of successive experiences. This metric is computed during the eval phase only. """ def __init__(self): """ Creates an instance of the StreamForgetting metric. """ super().__init__() self.stream_forgetting = Mean() """ The average forgetting over all experiences """ self.forgetting = Forgetting() """ The general metric to compute forgetting """ self._current_accuracy = Accuracy() """ The average accuracy over the current evaluation experience """ self.eval_exp_id = None """ The current evaluation experience id """ self.train_exp_id = None """ The last encountered training experience id """ def reset(self) -> None: """ Resets the forgetting metrics. Beware that this will also reset the initial accuracy of each experience! :return: None. """ self.forgetting.reset() self.stream_forgetting.reset() def reset_last_accuracy(self) -> None: """ Resets the last accuracy. This will preserve the initial accuracy value of each experience. To be used at the beginning of each eval experience. :return: None. """ self.forgetting.reset_last() def exp_update(self, k, v, initial=False): """ Update forgetting metric. See `Forgetting` for more detailed information. :param k: key to update :param v: value associated to k :param initial: update initial value. If False, update last value. """ self.forgetting.update(k, v, initial=initial) def exp_result(self, k=None) -> Union[float, None, Dict[int, float]]: """ Result for experience defined by a key. See `Forgetting` documentation for more detailed information. k: optional key from which compute forgetting. """ return self.forgetting.result(k=k) def result(self, k=None) -> Union[float, None, Dict[int, float]]: """ The average forgetting over all experience. k: optional key from which compute forgetting. """ return self.stream_forgetting.result() def before_training_exp(self, strategy: 'BaseStrategy') -> None: self.train_exp_id = strategy.experience.current_experience def before_eval(self, strategy) -> None: self.reset_current_accuracy() self.stream_forgetting.reset() def before_eval_exp(self, strategy: 'BaseStrategy') -> None: self._current_accuracy.reset() def after_eval_iteration(self, strategy: 'BaseStrategy') -> None: self.eval_exp_id = strategy.experience.current_experience self._current_accuracy.update(strategy.mb_y, strategy.logits) def after_eval_exp(self, strategy: 'BaseStrategy') -> None: # update experience on which training just ended if self.train_exp_id == self.eval_exp_id: self.exp_update(self.eval_exp_id, self._current_accuracy.result(), initial=True) else: # update other experiences # if experience has not been encountered in training # its value will not be considered in forgetting self.exp_update(self.eval_exp_id, self._current_accuracy.result()) # this checks if the evaluation experience has been # already encountered at training time # before the last training. # If not, forgetting should not be returned. if self.exp_result(k=self.eval_exp_id) is not None: exp_forgetting = self.exp_result(k=self.eval_exp_id) self.stream_forgetting.update(exp_forgetting, weight=1) def after_eval(self, strategy: 'BaseStrategy') -> \ 'MetricResult': return self._package_result(strategy) def _package_result(self, strategy: 'BaseStrategy') -> \ MetricResult: metric_value = self.result() phase_name, _ = phase_and_task(strategy) stream = stream_type(strategy.experience) metric_name = '{}/{}_phase/{}_stream' \ .format(str(self), phase_name, stream) plot_x_position = self.get_global_counter() return [MetricValue(self, metric_name, metric_value, plot_x_position)] def __str__(self): return "StreamForgetting"
# ConfusionMatrix, # Confusion Matrix # CPUUsage, # CPU Usage # DiskUsage, # Disk Usage # MaxGPU, # Max GPU Usage # MAC, # Multiply and Accumulate # MaxRAM, # Max RAM Usage # ElapsedTime # Timing metrics # create an instance of the standalone Accuracy metric # initial accuracy is 0 acc_metric = Accuracy() print("Initial Accuracy: ", acc_metric.result()) # output 0 # update method allows to keep the running average accuracy # result method returns the current average accuracy real_y = torch.tensor([1, 2]).long() predicted_y = torch.tensor([1, 0]).float() acc_metric.update(real_y, predicted_y) acc = acc_metric.result() print("Average Accuracy: ", acc) # output 0.5 # you can continue to update the metric with new values predicted_y = torch.tensor([1, 2]).float() acc_metric.update(real_y, predicted_y) acc = acc_metric.result() print("Average Accuracy: ", acc) # output 0.75 # reset accuracy to 0 acc_metric.reset() print("After reset: ", acc_metric.result()) # output 0
class ExperienceForgetting(PluginMetric[Dict[int, float]]): """ The ExperienceForgetting metric, describing the accuracy loss detected for a certain experience. This plugin metric, computed separately for each experience, is the difference between the accuracy result obtained after first training on a experience and the accuracy result obtained on the same experience at the end of successive experiences. This metric is computed during the eval phase only. """ def __init__(self): """ Creates an instance of the ExperienceForgetting metric. """ super().__init__() self.forgetting = Forgetting() """ The general metric to compute forgetting """ self._last_accuracy = Accuracy() """ The average accuracy over the current evaluation experience """ self.eval_exp_id = None """ The current evaluation experience id """ self.train_exp_id = None """ The last encountered training experience id """ def reset(self) -> None: """ Resets the metric. Beware that this will also reset the initial accuracy of each experience! :return: None. """ self.forgetting.reset() def reset_last_accuracy(self) -> None: """ Resets the last accuracy. This will preserve the initial accuracy value of each experience. To be used at the beginning of each eval experience. :return: None. """ self.forgetting.reset_last() def update(self, k, v, initial=False): """ Update forgetting metric. See `Forgetting` for more detailed information. :param k: key to update :param v: value associated to k :param initial: update initial value. If False, update last value. """ self.forgetting.update(k, v, initial=initial) def result(self, k=None) -> Union[float, None, Dict[int, float]]: """ See `Forgetting` documentation for more detailed information. k: optional key from which compute forgetting. """ return self.forgetting.result(k=k) def before_training_exp(self, strategy: 'BaseStrategy') -> None: self.train_exp_id = strategy.experience.current_experience def before_eval(self, strategy) -> None: self.reset_last_accuracy() def before_eval_exp(self, strategy: 'BaseStrategy') -> None: self._last_accuracy.reset() def after_eval_iteration(self, strategy: 'BaseStrategy') -> None: self.eval_exp_id = strategy.experience.current_experience self._last_accuracy.update(strategy.mb_y, strategy.logits) def after_eval_exp(self, strategy: 'BaseStrategy') \ -> MetricResult: # update experience on which training just ended if self.train_exp_id == self.eval_exp_id: self.update(self.eval_exp_id, self._last_accuracy.result(), initial=True) else: # update other experiences # if experience has not been encountered in training # its value will not be considered in forgetting self.update(self.eval_exp_id, self._last_accuracy.result()) # this checks if the evaluation experience has been # already encountered at training time # before the last training. # If not, forgetting should not be returned. if self.result(k=self.eval_exp_id) is not None: return self._package_result(strategy) def _package_result(self, strategy: 'BaseStrategy') \ -> MetricResult: forgetting = self.result(k=self.eval_exp_id) metric_name = get_metric_name(self, strategy, add_experience=True) plot_x_position = self._next_x_position(metric_name) metric_values = [ MetricValue(self, metric_name, forgetting, plot_x_position) ] return metric_values def __str__(self): return "ExperienceForgetting"
def test_standalone_accuracy(self): uut = Accuracy() # Initial accuracy should be 0 self.assertEqual(0.0, uut.result()) truth = torch.as_tensor([0, 5, 2, 1, 0]) predicted = torch.as_tensor([2, 3, 2, 5, 0]) # correct 2/5 = 40% uut.update(truth, predicted) self.assertEqual(0.4, uut.result()) truth = torch.as_tensor([0, 3, 2, 1, 0]) predicted = torch.as_tensor([2, 3, 2, 5, 0]) # correct 3/5 = 60% uut.update(truth, predicted) self.assertEqual(0.5, uut.result()) # After-reset accuracy should be 0 uut.reset() self.assertEqual(0.0, uut.result()) # Check if handles 0 accuracy truth = torch.as_tensor([0, 0, 0, 0]) predicted = torch.as_tensor([1, 1, 1, 1]) # correct 0/4 = 0% uut.update(truth, predicted) self.assertEqual(0.0, uut.result()) # Should throw exception when len(truth) != len(predicted) with self.assertRaises(ValueError): truth = torch.as_tensor([0, 0, 1, 0]) predicted = torch.as_tensor([1, 1, 1]) uut.update(truth, predicted) # Check accuracy didn't change after error self.assertEqual(0.0, uut.result()) # Test logits / one-hot support uut.reset() # Test one-hot (truth) truth = torch.as_tensor([[0, 1], [1, 0], [1, 0], [1, 0]]) predicted = torch.as_tensor([1, 1, 1, 1]) # correct 1/4 = 25% uut.update(truth, predicted) self.assertEqual(0.25, uut.result()) # Test logits (predictions) truth = torch.as_tensor([1, 1, 0, 0]) predicted = torch.as_tensor([[0.73, 0.1], [0.22, 0.33], [0.99, 0.01], [0.12, 0.11]]) # correct 3/4 = 75% uut.update(truth, predicted) self.assertEqual(0.5, uut.result()) # Test one-hot (truth) + logits (predictions) truth = torch.as_tensor([[1, 0], [1, 0], [0, 1], [0, 1]]) predicted = torch.as_tensor([[0.73, 0.1], [0.22, 0.33], [0.99, 0.01], [0.12, 0.11]]) # correct 1/4 = 25% uut.update(truth, predicted) self.assertEqual(5.0 / 12.0, uut.result())
from avalanche.evaluation.metrics import Accuracy if __name__ == "__main__": # ExML scenarios provide a stream of pretrained models exml_benchmark = ExMLMNIST(scenario="split") print( type(exml_benchmark).__name__, "testing expert models on the original train stream", ) for i, model in enumerate(exml_benchmark.expert_models_stream): # Each model is trained on a separate experience of the training stream. # Here we simply check the accuracy on the training experience # for each expert model. # Notice that most models have a very high (train) accuracy because they # overfitted their own experience. model.to("cuda") acc = Accuracy() train_data = exml_benchmark.original_benchmark.train_stream[i].dataset for x, y, t in DataLoader(train_data, batch_size=256, pin_memory=True, num_workers=8): x, y, t = x.to("cuda"), y.to("cuda"), t.to("cuda") y_pred = model(x) acc.update(y_pred, y, t) print(f"(i={i}) Original model accuracy: {acc.result()}") model.to("cpu")