def test_binary_wrong_inputs(): pr = Precision() with pytest.raises(ValueError): # y has not only 0 or 1 values pr.update((torch.randint(0, 2, size=(10,)).type(torch.LongTensor), torch.arange(0, 10).type(torch.LongTensor))) with pytest.raises(ValueError): # y_pred values are not thresholded to 0, 1 values pr.update((torch.rand(10, 1), torch.randint(0, 2, size=(10,)).type(torch.LongTensor))) with pytest.raises(ValueError): # incompatible shapes pr.update((torch.randint(0, 2, size=(10,)).type(torch.LongTensor), torch.randint(0, 2, size=(10, 5)).type(torch.LongTensor))) with pytest.raises(ValueError): # incompatible shapes pr.update((torch.randint(0, 2, size=(10, 5, 6)).type(torch.LongTensor), torch.randint(0, 2, size=(10,)).type(torch.LongTensor))) with pytest.raises(ValueError): # incompatible shapes pr.update((torch.randint(0, 2, size=(10,)).type(torch.LongTensor), torch.randint(0, 2, size=(10, 5, 6)).type(torch.LongTensor)))
def test_binary_input(average): pr = Precision(average=average) assert pr._updated is False def _test(y_pred, y, batch_size): pr.reset() assert pr._updated is False if batch_size > 1: n_iters = y.shape[0] // batch_size + 1 for i in range(n_iters): idx = i * batch_size pr.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size])) else: pr.update((y_pred, y)) np_y = y.numpy().ravel() np_y_pred = y_pred.numpy().ravel() assert pr._type == "binary" assert pr._updated is True assert isinstance(pr.compute(), float if average else torch.Tensor) pr_compute = pr.compute() if average else pr.compute().numpy() assert precision_score(np_y, np_y_pred, average="binary") == pytest.approx(pr_compute) def get_test_cases(): test_cases = [ # Binary accuracy on input of shape (N, 1) or (N, ) (torch.randint(0, 2, size=(10,)), torch.randint(0, 2, size=(10,)), 1), (torch.randint(0, 2, size=(10, 1)), torch.randint(0, 2, size=(10, 1)), 1), # updated batches (torch.randint(0, 2, size=(50,)), torch.randint(0, 2, size=(50,)), 16), (torch.randint(0, 2, size=(50, 1)), torch.randint(0, 2, size=(50, 1)), 16), # Binary accuracy on input of shape (N, L) (torch.randint(0, 2, size=(10, 5)), torch.randint(0, 2, size=(10, 5)), 1), (torch.randint(0, 2, size=(10, 1, 5)), torch.randint(0, 2, size=(10, 1, 5)), 1), # updated batches (torch.randint(0, 2, size=(50, 5)), torch.randint(0, 2, size=(50, 5)), 16), (torch.randint(0, 2, size=(50, 1, 5)), torch.randint(0, 2, size=(50, 1, 5)), 16), # Binary accuracy on input of shape (N, H, W) (torch.randint(0, 2, size=(10, 12, 10)), torch.randint(0, 2, size=(10, 12, 10)), 1), (torch.randint(0, 2, size=(10, 1, 12, 10)), torch.randint(0, 2, size=(10, 1, 12, 10)), 1), # updated batches (torch.randint(0, 2, size=(50, 12, 10)), torch.randint(0, 2, size=(50, 12, 10)), 16), (torch.randint(0, 2, size=(50, 1, 12, 10)), torch.randint(0, 2, size=(50, 1, 12, 10)), 16), # Corner case with all zeros predictions (torch.zeros(size=(10,)), torch.randint(0, 2, size=(10,)), 1), (torch.zeros(size=(10, 1)), torch.randint(0, 2, size=(10, 1)), 1), ] return test_cases for _ in range(5): # check multiple random inputs as random exact occurencies are rare test_cases = get_test_cases() for y_pred, y, batch_size in test_cases: _test(y, y_pred, batch_size)
def test_multilabel_input(average): pr = Precision(average=average, is_multilabel=True) def _test(y_pred, y, batch_size): pr.reset() if batch_size > 1: n_iters = y.shape[0] // batch_size + 1 for i in range(n_iters): idx = i * batch_size pr.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size])) else: pr.update((y_pred, y)) np_y_pred = to_numpy_multilabel(y_pred) np_y = to_numpy_multilabel(y) assert pr._type == "multilabel" pr_compute = pr.compute() if average else pr.compute().mean().item() with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert precision_score(np_y, np_y_pred, average="samples") == pytest.approx(pr_compute) pr1 = Precision(is_multilabel=True, average=True) pr2 = Precision(is_multilabel=True, average=False) pr1.update((y_pred, y)) pr2.update((y_pred, y)) assert pr1.compute() == pytest.approx(pr2.compute().mean().item()) def get_test_cases(): test_cases = [ # Multilabel input data of shape (N, C) (torch.randint(0, 2, size=(10, 5)), torch.randint(0, 2, size=(10, 5)), 1), (torch.randint(0, 2, size=(10, 4)), torch.randint(0, 2, size=(10, 4)), 1), # updated batches (torch.randint(0, 2, size=(50, 5)), torch.randint(0, 2, size=(50, 5)), 16), (torch.randint(0, 2, size=(50, 4)), torch.randint(0, 2, size=(50, 4)), 16), # Multilabel input data of shape (N, C, L) (torch.randint(0, 2, size=(10, 5, 10)), torch.randint(0, 2, size=(10, 5, 10)), 1), (torch.randint(0, 2, size=(10, 4, 10)), torch.randint(0, 2, size=(10, 4, 10)), 1), # updated batches (torch.randint(0, 2, size=(50, 5, 10)), torch.randint(0, 2, size=(50, 5, 10)), 16), (torch.randint(0, 2, size=(50, 4, 10)), torch.randint(0, 2, size=(50, 4, 10)), 16), # Multilabel input data of shape (N, H, W, ...) and (N, C, H, W, ...) (torch.randint(0, 2, size=(10, 5, 18, 16)), torch.randint(0, 2, size=(10, 5, 18, 16)), 1), (torch.randint(0, 2, size=(10, 4, 20, 23)), torch.randint(0, 2, size=(10, 4, 20, 23)), 1), # updated batches (torch.randint(0, 2, size=(50, 5, 18, 16)), torch.randint(0, 2, size=(50, 5, 18, 16)), 16), (torch.randint(0, 2, size=(50, 4, 20, 23)), torch.randint(0, 2, size=(50, 4, 20, 23)), 16), ] return test_cases for _ in range(5): # check multiple random inputs as random exact occurencies are rare test_cases = get_test_cases() for y_pred, y, batch_size in test_cases: _test(y_pred, y, batch_size)
def _test(average): pr = Precision(average=average) y_pred = torch.randint(0, 2, size=(10, 4)).float() y = torch.randint(4, 5, size=(10,)).long() with pytest.raises(ValueError): pr.update((y_pred, y))
def test_compute_average(): precision = Precision(average=True) y_pred = torch.eye(4) y = torch.ones(4).type(torch.LongTensor) precision.update((y_pred, y)) assert isinstance(precision.compute(), float) assert precision.compute() == 0.25
def _test(average): pr = Precision(average=average) y_pred = torch.randint(0, 2, size=(10, 12, 10)) y = torch.randint(0, 2, size=(10, 12, 10)).type(torch.LongTensor) pr.update((y_pred, y)) np_y = y.numpy().ravel() np_y_pred = y_pred.numpy().ravel() assert pr._type == 'binary' assert isinstance(pr.compute(), float if average else torch.Tensor) pr_compute = pr.compute() if average else pr.compute().numpy() assert precision_score(np_y, np_y_pred, average='binary') == pytest.approx(pr_compute) pr.reset() y_pred = torch.randint(0, 2, size=(10, 1, 12, 10)) y = torch.randint(0, 2, size=(10, 1, 12, 10)).type(torch.LongTensor) pr.update((y_pred, y)) np_y = y.numpy().ravel() np_y_pred = y_pred.numpy().ravel() assert pr._type == 'binary' assert isinstance(pr.compute(), float if average else torch.Tensor) pr_compute = pr.compute() if average else pr.compute().numpy() assert precision_score(np_y, np_y_pred, average='binary') == pytest.approx(pr_compute) pr = Precision(average=average) # Batched Updates pr.reset() y_pred = torch.randint(0, 2, size=(100, 12, 10)) y = torch.randint(0, 2, size=(100, 1, 12, 10)).type(torch.LongTensor) batch_size = 16 n_iters = y.shape[0] // batch_size + 1 for i in range(n_iters): idx = i * batch_size pr.update((y_pred[idx:idx + batch_size], y[idx:idx + batch_size])) np_y = y.numpy().ravel() np_y_pred = y_pred.numpy().ravel() assert pr._type == 'binary' assert isinstance(pr.compute(), float if average else torch.Tensor) pr_compute = pr.compute() if average else pr.compute().numpy() assert precision_score(np_y, np_y_pred, average='binary') == pytest.approx(pr_compute)
def metrics_estimating(tester, criterion): Accuracy(output_transform=thresholded_output_transform).attach( tester, 'accuracy') Recall(output_transform=thresholded_output_transform, average=True).attach(tester, 'recall') Precision(output_transform=thresholded_output_transform, average=True).attach(tester, 'precision') Loss(criterion).attach(tester, 'loss')
def test_integration(): def _test(p, r, average, output_transform): np.random.seed(1) n_iters = 10 batch_size = 10 n_classes = 10 y_true = np.arange(0, n_iters * batch_size) % n_classes y_pred = 0.2 * np.random.rand(n_iters * batch_size, n_classes) for i in range(n_iters * batch_size): if np.random.rand() > 0.4: y_pred[i, y_true[i]] = 1.0 else: j = np.random.randint(0, n_classes) y_pred[i, j] = 0.7 y_true_batch_values = iter(y_true.reshape(n_iters, batch_size)) y_pred_batch_values = iter(y_pred.reshape(n_iters, batch_size, n_classes)) def update_fn(engine, batch): y_true_batch = next(y_true_batch_values) y_pred_batch = next(y_pred_batch_values) if output_transform is not None: return { 'y_pred': torch.from_numpy(y_pred_batch), 'y': torch.from_numpy(y_true_batch) } return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch) evaluator = Engine(update_fn) f2 = Fbeta(beta=2.0, average=average, precision=p, recall=r, output_transform=output_transform) f2.attach(evaluator, "f2") data = list(range(n_iters)) state = evaluator.run(data, max_epochs=1) f2_true = fbeta_score(y_true, np.argmax(y_pred, axis=-1), average='macro' if average else None, beta=2.0) if isinstance(state.metrics['f2'], torch.Tensor): np.testing.assert_allclose(f2_true, state.metrics['f2'].numpy()) else: assert f2_true == pytest.approx(state.metrics['f2']), "{} vs {}".format(f2_true, state.metrics['f2']) _test(None, None, False, output_transform=None) _test(None, None, True, output_transform=None) def output_transform(output): return output['y_pred'], output['y'] _test(None, None, False, output_transform=output_transform) _test(None, None, True, output_transform=output_transform) precision = Precision(average=False) recall = Recall(average=False) _test(precision, recall, False, None) _test(precision, recall, True, None)
def folds(self, kf): model = BGRU(self.input_size, self.hidden_size, self.num_layers, self.num_classes, self.batch_size, self.dropout) loss = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=self.learning_rate) train_loader, valid_loader = _get_data_loader(kf, self.batch_size) trainer = create_supervised_trainer(model, optimizer, loss, device=DEVICE) evaluator = create_supervised_evaluator(model, metrics={ 'acc': CategoricalAccuracy(), 'loss': Loss(loss), 'prec': Precision(average=True), 'recall': Recall(average=True) }, device=DEVICE) @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(trainer): iter_num = trainer.state.iteration if iter_num % 10 == 0: logger.info("Epoch[{}] Iter: {} Loss: {:.2f}".format( trainer.state.epoch, iter_num, trainer.state.output)) @trainer.on(Events.EPOCH_COMPLETED) def log_training_results(trainer): evaluator.run(train_loader) metrics = evaluator.state.metrics f1 = (2 * metrics['prec'] * metrics['recall']) / (metrics['prec'] + metrics['recall']) logger.info( "Train Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f} Avg Precision: {:.2f} Avg Recall: {:.2f} Avg F1 Score: {:.2f}" .format(trainer.state.epoch, metrics['acc'], metrics['loss'], metrics['prec'], metrics['recall'], f1)) @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(trainer): evaluator.run(valid_loader) metrics = evaluator.state.metrics f1 = (2 * metrics['prec'] * metrics['recall']) / (metrics['prec'] + metrics['recall']) for k in self.res.keys(): if k != 'f1': self.res[k].append(metrics[k]) else: self.res[k].append(f1) logger.info( "Valid Results - Epoch: {} Avg accuracy: {:.2f} Avg loss: {:.2f} Avg Precision: {:.2f} Avg Recall: {:.2f} Avg F1 Score: {:.2f}" .format(trainer.state.epoch, metrics['acc'], metrics['loss'], metrics['prec'], metrics['recall'], f1)) trainer.run(train_loader, max_epochs=self.num_epochs) return model
def test_integration(): np.random.seed(1) n_iters = 10 batch_size = 10 n_classes = 10 y_true = np.arange(0, n_iters * batch_size) % n_classes y_pred = 0.2 * np.random.rand(n_iters * batch_size, n_classes) for i in range(n_iters * batch_size): if np.random.rand() > 0.4: y_pred[i, y_true[i]] = 1.0 else: j = np.random.randint(0, n_classes) y_pred[i, j] = 0.7 y_true_batch_values = iter(y_true.reshape(n_iters, batch_size)) y_pred_batch_values = iter(y_pred.reshape(n_iters, batch_size, n_classes)) def update_fn(engine, batch): y_true_batch = next(y_true_batch_values) y_pred_batch = next(y_pred_batch_values) return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch) evaluator = Engine(update_fn) precision = Precision(average=False) recall = Recall(average=False) def Fbeta(r, p, beta): return torch.mean((1 + beta**2) * p * r / (beta**2 * p + r)).item() F1 = MetricsLambda(Fbeta, recall, precision, 1) precision.attach(evaluator, "precision") recall.attach(evaluator, "recall") F1.attach(evaluator, "f1") data = list(range(n_iters)) state = evaluator.run(data, max_epochs=1) precision_true = precision_score(y_true, np.argmax(y_pred, axis=-1), average=None) recall_true = recall_score(y_true, np.argmax(y_pred, axis=-1), average=None) f1_true = f1_score(y_true, np.argmax(y_pred, axis=-1), average='macro') precision = state.metrics['precision'].numpy() recall = state.metrics['recall'].numpy() assert precision_true == approx(precision), "{} vs {}".format( precision_true, precision) assert recall_true == approx(recall), "{} vs {}".format( recall_true, recall) assert f1_true == approx(state.metrics['f1']), "{} vs {}".format( f1_true, state.metrics['f1'])
def test_multiclass_input(average): pr = Precision(average=average) def _test(y_pred, y, batch_size): pr.reset() if batch_size > 1: n_iters = y.shape[0] // batch_size + 1 for i in range(n_iters): idx = i * batch_size pr.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size])) else: pr.update((y_pred, y)) num_classes = y_pred.shape[1] np_y_pred = y_pred.argmax(dim=1).numpy().ravel() np_y = y.numpy().ravel() assert pr._type == "multiclass" assert isinstance(pr.compute(), float if average else torch.Tensor) pr_compute = pr.compute() if average else pr.compute().numpy() sk_average_parameter = "macro" if average else None with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) sk_compute = precision_score(np_y, np_y_pred, labels=range(0, num_classes), average=sk_average_parameter) assert sk_compute == pytest.approx(pr_compute) def get_test_cases(): test_cases = [ # Multiclass input data of shape (N, ) and (N, C) (torch.rand(10, 6), torch.randint(0, 6, size=(10,)), 1), (torch.rand(10, 4), torch.randint(0, 4, size=(10,)), 1), # updated batches (torch.rand(50, 6), torch.randint(0, 6, size=(50,)), 16), (torch.rand(50, 4), torch.randint(0, 4, size=(50,)), 16), # Multiclass input data of shape (N, L) and (N, C, L) (torch.rand(10, 5, 8), torch.randint(0, 5, size=(10, 8)), 1), (torch.rand(10, 8, 12), torch.randint(0, 8, size=(10, 12)), 1), # updated batches (torch.rand(50, 5, 8), torch.randint(0, 5, size=(50, 8)), 16), (torch.rand(50, 8, 12), torch.randint(0, 8, size=(50, 12)), 16), # Multiclass input data of shape (N, H, W, ...) and (N, C, H, W, ...) (torch.rand(10, 5, 18, 16), torch.randint(0, 5, size=(10, 18, 16)), 1), (torch.rand(10, 7, 20, 12), torch.randint(0, 7, size=(10, 20, 12)), 1), # updated batches (torch.rand(50, 5, 18, 16), torch.randint(0, 5, size=(50, 18, 16)), 16), (torch.rand(50, 7, 20, 12), torch.randint(0, 7, size=(50, 20, 12)), 16), ] return test_cases for _ in range(5): # check multiple random inputs as random exact occurencies are rare test_cases = get_test_cases() for y_pred, y, batch_size in test_cases: _test(y_pred, y, batch_size)
def _test(average): pr = Precision(average=average) y_pred = torch.rand(10, 5, 18, 16) y = torch.randint(0, 5, size=(10, 18, 16)).long() pr.update((y_pred, y)) num_classes = y_pred.shape[1] np_y_pred = y_pred.argmax(dim=1).numpy().ravel() np_y = y.numpy().ravel() assert pr._type == "multiclass" assert isinstance(pr.compute(), float if average else torch.Tensor) pr_compute = pr.compute() if average else pr.compute().numpy() sk_average_parameter = "macro" if average else None with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) sk_compute = precision_score(np_y, np_y_pred, labels=range(0, num_classes), average=sk_average_parameter) assert sk_compute == pytest.approx(pr_compute) pr.reset() y_pred = torch.rand(10, 7, 20, 12) y = torch.randint(0, 7, size=(10, 20, 12)).long() pr.update((y_pred, y)) num_classes = y_pred.shape[1] np_y_pred = y_pred.argmax(dim=1).numpy().ravel() np_y = y.numpy().ravel() assert pr._type == "multiclass" assert isinstance(pr.compute(), float if average else torch.Tensor) pr_compute = pr.compute() if average else pr.compute().numpy() sk_average_parameter = "macro" if average else None with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) sk_compute = precision_score(np_y, np_y_pred, labels=range(0, num_classes), average=sk_average_parameter) assert sk_compute == pytest.approx(pr_compute) # Batched Updates pr.reset() y_pred = torch.rand(100, 8, 12, 14) y = torch.randint(0, 8, size=(100, 12, 14)).long() batch_size = 16 n_iters = y.shape[0] // batch_size + 1 for i in range(n_iters): idx = i * batch_size pr.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size])) num_classes = y_pred.shape[1] np_y = y.numpy().ravel() np_y_pred = y_pred.argmax(dim=1).numpy().ravel() assert pr._type == "multiclass" assert isinstance(pr.compute(), float if average else torch.Tensor) pr_compute = pr.compute() if average else pr.compute().numpy() sk_average_parameter = "macro" if average else None with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) sk_compute = precision_score(np_y, np_y_pred, labels=range(0, num_classes), average=sk_average_parameter) assert sk_compute == pytest.approx(pr_compute)
def __init__(self, prefix, loss_type: str, threshold=0.5, top_k=[1, 5, 10], n_classes: int = None, multilabel: bool = None, metrics=["precision", "recall", "top_k", "accuracy"]): super().__init__() self.loss_type = loss_type.upper() self.threshold = threshold self.n_classes = n_classes self.multilabel = multilabel self.top_ks = top_k self.prefix = prefix self.metrics = {} for metric in metrics: if "precision" == metric: self.metrics[metric] = Precision(average=True, is_multilabel=multilabel) elif "recall" == metric: self.metrics[metric] = Recall(average=True, is_multilabel=multilabel) elif "top_k" in metric: if n_classes: top_k = [k for k in top_k if k < n_classes] if multilabel: self.metrics[metric] = TopKMultilabelAccuracy(k_s=top_k) else: self.metrics[metric] = TopKCategoricalAccuracy(k=max(int(np.log(n_classes)), 1), output_transform=None) elif "macro_f1" in metric: self.metrics[metric] = F1(num_classes=n_classes, average="macro", multilabel=multilabel) elif "micro_f1" in metric: self.metrics[metric] = F1(num_classes=n_classes, average="micro", multilabel=multilabel) elif "mse" == metric: self.metrics[metric] = MeanSquaredError() elif "auroc" == metric: self.metrics[metric] = AUROC(num_classes=n_classes) elif "avg_precision" in metric: self.metrics[metric] = AveragePrecision(num_classes=n_classes, ) elif "accuracy" in metric: self.metrics[metric] = Accuracy(top_k=int(metric.split("@")[-1]) if "@" in metric else None) elif "ogbn" in metric: self.metrics[metric] = OGBNodeClfMetrics(NodeEvaluator(metric)) elif "ogbg" in metric: self.metrics[metric] = OGBNodeClfMetrics(GraphEvaluator(metric)) elif "ogbl" in metric: self.metrics[metric] = OGBLinkPredMetrics(LinkEvaluator(metric)) else: print(f"WARNING: metric {metric} doesn't exist") # Needed to add the PytorchGeometric methods as Modules, so they'll be on the correct CUDA device during training if isinstance(self.metrics[metric], torchmetrics.metric.Metric): setattr(self, metric, self.metrics[metric]) self.reset_metrics()
def test_compute_all_wrong(): precision = Precision() y_pred = torch.FloatTensor([[1.0, 0.0], [1.0, 0.0]]) y = torch.ones(2).type(torch.LongTensor) precision.update((y_pred, y)) results = list(precision.compute()) assert results[0] == 0.0 assert results[1] == 0.0
def test_integration(): np.random.seed(1) n_iters = 10 batch_size = 10 n_classes = 10 y_true = np.arange(0, n_iters * batch_size, dtype="int64") % n_classes y_pred = 0.2 * np.random.rand(n_iters * batch_size, n_classes) for i in range(n_iters * batch_size): if np.random.rand() > 0.4: y_pred[i, y_true[i]] = 1.0 else: j = np.random.randint(0, n_classes) y_pred[i, j] = 0.7 y_true_batch_values = iter(y_true.reshape(n_iters, batch_size)) y_pred_batch_values = iter(y_pred.reshape(n_iters, batch_size, n_classes)) def update_fn(engine, batch): y_true_batch = next(y_true_batch_values) y_pred_batch = next(y_pred_batch_values) return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch) evaluator = Engine(update_fn) precision = Precision(average=False) recall = Recall(average=False) F1 = precision * recall * 2 / (precision + recall) precision.attach(evaluator, "precision") recall.attach(evaluator, "recall") F1.attach(evaluator, "f1") data = list(range(n_iters)) state = evaluator.run(data, max_epochs=1) precision_true = precision_score(y_true, np.argmax(y_pred, axis=-1), average=None) recall_true = recall_score(y_true, np.argmax(y_pred, axis=-1), average=None) f1_true = f1_score(y_true, np.argmax(y_pred, axis=-1), average=None) precision = state.metrics["precision"].numpy() recall = state.metrics["recall"].numpy() f1 = state.metrics["f1"].numpy() assert precision_true == approx(precision), "{} vs {}".format( precision_true, precision) assert recall_true == approx(recall), "{} vs {}".format( recall_true, recall) assert f1_true == approx(f1), "{} vs {}".format(f1_true, f1)
def test_wrong_inputs(): with pytest.raises(ValueError, match=r"Beta should be a positive integer"): Fbeta(0.0) with pytest.raises(ValueError, match=r"Input precision metric should have average=False"): p = Precision(average=True) Fbeta(1.0, precision=p) with pytest.raises(ValueError, match=r"Input recall metric should have average=False"): r = Recall(average=True) Fbeta(1.0, recall=r) with pytest.raises(ValueError, match=r"If precision argument is provided, output_transform should be None"): p = Precision(average=False) Fbeta(1.0, precision=p, output_transform=lambda x: x) with pytest.raises(ValueError, match=r"If recall argument is provided, output_transform should be None"): r = Recall(average=False) Fbeta(1.0, recall=r, output_transform=lambda x: x)
def test_wrong_inputs(): with pytest.raises(ValueError, match=r"Beta should be a positive integer"): Fbeta(0.0) with pytest.raises(ValueError, match=r"Input precision metric should have average=False"): p = Precision(average=True) Fbeta(1.0, precision=p) with pytest.raises(ValueError, match=r"Input recall metric should have average=False"): r = Recall(average=True) Fbeta(1.0, recall=r)
def get_metrics(non_binary_y_target): metrics = { 'accuracy': BinaryAccuracy(output_transform=zero_one_transform), 'bce': Loss(nn.modules.loss.BCELoss()), 'f1_score': F1_Score(output_transform=zero_one_transform), 'roc_auc': ROC_AUC(), 'precision': Precision(output_transform=zero_one_transform), 'recall': Recall(output_transform=zero_one_transform), 'conf_matrix': ConfusionMatrix(output_transform=zero_one_transform), # 'positive_stat': PositiveStatistics(non_binary_y_target), } return metrics
def create_task_metrics(torch_dtype, loss_func, output_transform=lambda x: x): if torch_dtype in (torch.float32, torch.float64): metrics = {'loss': Loss(loss_func, output_transform=output_transform)} elif torch_dtype in (torch.int16, torch.int32, torch.int64): metrics = { 'accuracy': Accuracy(output_transform=output_transform), 'precision': Precision(output_transform=output_transform), 'recall': Recall(output_transform=output_transform), 'loss': Loss(F.nll_loss, output_transform=output_transform) } return metrics
def _test(average): pr = Precision(average=average) y_pred = torch.softmax(torch.rand(4, 4), dim=1) y = torch.ones(4).long() pr.update((y_pred, y)) y_pred = torch.randint(0, 2, size=(4,)) y = torch.ones(4).long() with pytest.raises(RuntimeError): pr.update((y_pred, y))
def get_evaluators(model, configuration): assert ( configuration.data_type in EVALUATOR_FACTORY_MAP ), "Data type not in {}".format(EVALUATOR_FACTORY_MAP.keys()) metrics = { "accuracy": Accuracy(_output_transform), "precision": Precision(_output_transform), "recall": Recall(_output_transform), "loss": Loss(get_criterion(configuration)), "auc": ROC_AUC(), "tnr": Recall(_negative_output_transform), "npv": Precision(_negative_output_transform), } train_evaluator = EVALUATOR_FACTORY_MAP[configuration.data_type]( model, metrics=metrics, device=configuration.device, ) val_evaluator = EVALUATOR_FACTORY_MAP[configuration.data_type]( model, metrics=metrics, device=configuration.device, ) return train_evaluator, val_evaluator
def _test(average): pr = Precision(average=average) y_pred = torch.softmax(torch.rand(4, 4), dim=1) y = torch.ones(4).type(torch.LongTensor) pr.update((y_pred, y)) y_pred = torch.rand(4, 1) y = torch.ones(4).type(torch.LongTensor) with pytest.raises(RuntimeError): pr.update((y_pred, y))
def _test_distrib_itegration(device): import torch.distributed as dist rank = dist.get_rank() torch.manual_seed(12) def _test(p, r, average, n_epochs): n_iters = 60 s = 16 n_classes = 7 offset = n_iters * s y_true = torch.randint(0, n_classes, size=(offset * dist.get_world_size(), )).to(device) y_preds = torch.rand(offset * dist.get_world_size(), n_classes).to(device) def update(engine, i): return ( y_preds[i * s + rank * offset:(i + 1) * s + rank * offset, :], y_true[i * s + rank * offset:(i + 1) * s + rank * offset], ) engine = Engine(update) fbeta = Fbeta(beta=2.5, average=average, device=device) fbeta.attach(engine, "f2.5") data = list(range(n_iters)) engine.run(data=data, max_epochs=n_epochs) assert "f2.5" in engine.state.metrics res = engine.state.metrics["f2.5"] if isinstance(res, torch.Tensor): res = res.cpu().numpy() true_res = fbeta_score( y_true.cpu().numpy(), torch.argmax(y_preds, dim=1).cpu().numpy(), beta=2.5, average="macro" if average else None, ) assert pytest.approx(res) == true_res _test(None, None, average=True, n_epochs=1) _test(None, None, average=True, n_epochs=2) precision = Precision(average=False) recall = Recall(average=False) _test(precision, recall, average=False, n_epochs=1) _test(precision, recall, average=False, n_epochs=2)
def _create_evaluator_engine(self): """ """ return create_supervised_evaluator( self.model, device=self.device, metrics={ "Accuracy": Accuracy(), "Loss": Loss(self.loss), "Recall": Recall(average=True), "Top K Categorical Accuracy": TopKCategoricalAccuracy(k=10), "Precision": Precision(average=True), }, )
def _test(average): pr = Precision(average=average, is_multilabel=True) y_pred = torch.randint(0, 2, size=(10, 5, 18, 16)) y = torch.randint(0, 2, size=(10, 5, 18, 16)).long() pr.update((y_pred, y)) np_y_pred = to_numpy_multilabel(y_pred) np_y = to_numpy_multilabel(y) assert pr._type == "multilabel" pr_compute = pr.compute() if average else pr.compute().mean().item() with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert precision_score( np_y, np_y_pred, average="samples") == pytest.approx(pr_compute) pr.reset() y_pred = torch.randint(0, 2, size=(10, 4, 20, 23)) y = torch.randint(0, 2, size=(10, 4, 20, 23)).long() pr.update((y_pred, y)) np_y_pred = to_numpy_multilabel(y_pred) np_y = to_numpy_multilabel(y) assert pr._type == "multilabel" pr_compute = pr.compute() if average else pr.compute().mean().item() with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert precision_score( np_y, np_y_pred, average="samples") == pytest.approx(pr_compute) # Batched Updates pr.reset() y_pred = torch.randint(0, 2, size=(100, 5, 12, 14)) y = torch.randint(0, 2, size=(100, 5, 12, 14)).long() batch_size = 16 n_iters = y.shape[0] // batch_size + 1 for i in range(n_iters): idx = i * batch_size pr.update((y_pred[idx:idx + batch_size], y[idx:idx + batch_size])) np_y = to_numpy_multilabel(y) np_y_pred = to_numpy_multilabel(y_pred) assert pr._type == "multilabel" pr_compute = pr.compute() if average else pr.compute().mean().item() with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert precision_score( np_y, np_y_pred, average="samples") == pytest.approx(pr_compute)
def run(self, logging_dir=None, best_model_only=True): #assert self.model is not None, '[ERROR] No model object loaded. Please load a PyTorch model torch.nn object into the class object.' #assert (self.train_loader is not None) or (self.val_loader is not None), '[ERROR] You must specify data loaders.' for key in self.trainer_status.keys(): assert self.trainer_status[ key], '[ERROR] The {} has not been generated and you cannot proceed.'.format( key) print('[INFO] Trainer pass OK for training.') # TRAIN ENGINE # Create the objects for training self.train_engine = self.create_trainer() # METRICS AND EVALUATION # Metrics - running average RunningAverage(output_transform=lambda x: x).attach( self.train_engine, 'loss') # Metrics - epochs metrics = { 'accuracy': Accuracy(), 'recall': Recall(average=True), 'precision': Precision(average=True), 'f1': Fbeta(beta=1), 'topKCatAcc': TopKCategoricalAccuracy(k=5), 'loss': Loss(self.criterion) } # Create evaluators self.evaluator = self.create_evaluator(metrics=metrics) self.train_evaluator = self.create_evaluator(metrics=metrics, tag='train') # LOGGING # Create logging to terminal self.add_logging() # Create Tensorboard logging self.add_tensorboard_logging(logging_dir=logging_dir) ## CALLBACKS self.create_callbacks(best_model_only=best_model_only) ## TRAIN # Train the model print('[INFO] Executing model training...') self.train_engine.run(self.train_loader, max_epochs=self.config.TRAIN.NUM_EPOCHS) print('[INFO] Model training is complete.')
def _test(average): pr = Precision(average=average, is_multilabel=True) y_pred = torch.randint(0, 2, size=(20, 5)) y = torch.randint(0, 2, size=(20, 5)).type(torch.LongTensor) pr.update((y_pred, y)) np_y_pred = y_pred.numpy() np_y = y.numpy() assert pr._type == 'multilabel' pr_compute = pr.compute() if average else pr.compute().mean().item() with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert precision_score( np_y, np_y_pred, average='samples') == pytest.approx(pr_compute) pr.reset() y_pred = torch.randint(0, 2, size=(10, 4)) y = torch.randint(0, 2, size=(10, 4)).type(torch.LongTensor) pr.update((y_pred, y)) np_y_pred = y_pred.numpy() np_y = y.numpy() assert pr._type == 'multilabel' pr_compute = pr.compute() if average else pr.compute().mean().item() with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert precision_score( np_y, np_y_pred, average='samples') == pytest.approx(pr_compute) # Batched Updates pr.reset() y_pred = torch.randint(0, 2, size=(100, 4)) y = torch.randint(0, 2, size=(100, 4)).type(torch.LongTensor) batch_size = 16 n_iters = y.shape[0] // batch_size + 1 for i in range(n_iters): idx = i * batch_size pr.update((y_pred[idx:idx + batch_size], y[idx:idx + batch_size])) np_y = y.numpy() np_y_pred = y_pred.numpy() assert pr._type == 'multilabel' pr_compute = pr.compute() if average else pr.compute().mean().item() with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert precision_score( np_y, np_y_pred, average='samples') == pytest.approx(pr_compute)
def test_multiclass_wrong_inputs(): pr = Precision() with pytest.raises(ValueError): # incompatible shapes pr.update((torch.rand(10, 5, 4), torch.randint(0, 2, size=(10,)).long())) with pytest.raises(ValueError): # incompatible shapes pr.update((torch.rand(10, 5, 6), torch.randint(0, 5, size=(10, 5)).long())) with pytest.raises(ValueError): # incompatible shapes pr.update((torch.rand(10), torch.randint(0, 5, size=(10, 5, 6)).long())) pr = Precision(average=True) with pytest.raises(ValueError): # incompatible shapes between two updates pr.update((torch.rand(10, 5), torch.randint(0, 5, size=(10,)).long())) pr.update((torch.rand(10, 6), torch.randint(0, 5, size=(10,)).long())) with pytest.raises(ValueError): # incompatible shapes between two updates pr.update((torch.rand(10, 5, 12, 14), torch.randint(0, 5, size=(10, 12, 14)).long())) pr.update((torch.rand(10, 6, 12, 14), torch.randint(0, 5, size=(10, 12, 14)).long())) pr = Precision(average=False) with pytest.raises(ValueError): # incompatible shapes between two updates pr.update((torch.rand(10, 5), torch.randint(0, 5, size=(10,)).long())) pr.update((torch.rand(10, 6), torch.randint(0, 5, size=(10,)).long())) with pytest.raises(ValueError): # incompatible shapes between two updates pr.update((torch.rand(10, 5, 12, 14), torch.randint(0, 5, size=(10, 12, 14)).long())) pr.update((torch.rand(10, 6, 12, 14), torch.randint(0, 5, size=(10, 12, 14)).long()))
def test_incorrect_shape(): precision = Precision() y_pred = torch.zeros(2, 3, 2, 2) y = torch.zeros(2, 3) with pytest.raises(ValueError): precision.update((y_pred, y)) y_pred = torch.zeros(2, 3, 2, 2) y = torch.zeros(2, 3, 4, 4) with pytest.raises(ValueError): precision.update((y_pred, y))
def _test(average, n_epochs, metric_device): n_iters = 60 s = 16 n_classes = 7 offset = n_iters * s y_true = torch.randint(0, 2, size=(offset * idist.get_world_size(), n_classes, 6, 8)).to(device) y_preds = torch.randint(0, 2, size=(offset * idist.get_world_size(), n_classes, 6, 8)).to(device) def update(engine, i): return ( y_preds[i * s + rank * offset:(i + 1) * s + rank * offset, ...], y_true[i * s + rank * offset:(i + 1) * s + rank * offset, ...], ) engine = Engine(update) pr = Precision(average=average, is_multilabel=True, device=metric_device) pr.attach(engine, "pr") data = list(range(n_iters)) engine.run(data=data, max_epochs=n_epochs) assert "pr" in engine.state.metrics res = engine.state.metrics["pr"] res2 = pr.compute() if isinstance(res, torch.Tensor): res = res.cpu().numpy() res2 = res2.cpu().numpy() assert (res == res2).all() else: assert res == res2 np_y_preds = to_numpy_multilabel(y_preds) np_y_true = to_numpy_multilabel(y_true) assert pr._type == "multilabel" res = res if average else res.mean().item() with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UndefinedMetricWarning) assert precision_score(np_y_true, np_y_preds, average="samples") == pytest.approx(res)