示例#1
0
def test_binary_wrong_inputs():
    pr = Precision()

    with pytest.raises(ValueError):
        # y has not only 0 or 1 values
        pr.update((torch.randint(0, 2, size=(10,)).type(torch.LongTensor),
                   torch.arange(0, 10).type(torch.LongTensor)))

    with pytest.raises(ValueError):
        # y_pred values are not thresholded to 0, 1 values
        pr.update((torch.rand(10, 1),
                   torch.randint(0, 2, size=(10,)).type(torch.LongTensor)))

    with pytest.raises(ValueError):
        # incompatible shapes
        pr.update((torch.randint(0, 2, size=(10,)).type(torch.LongTensor),
                   torch.randint(0, 2, size=(10, 5)).type(torch.LongTensor)))

    with pytest.raises(ValueError):
        # incompatible shapes
        pr.update((torch.randint(0, 2, size=(10, 5, 6)).type(torch.LongTensor),
                   torch.randint(0, 2, size=(10,)).type(torch.LongTensor)))

    with pytest.raises(ValueError):
        # incompatible shapes
        pr.update((torch.randint(0, 2, size=(10,)).type(torch.LongTensor),
                   torch.randint(0, 2, size=(10, 5, 6)).type(torch.LongTensor)))
示例#2
0
def test_binary_input(average):

    pr = Precision(average=average)
    assert pr._updated is False

    def _test(y_pred, y, batch_size):
        pr.reset()
        assert pr._updated is False

        if batch_size > 1:
            n_iters = y.shape[0] // batch_size + 1
            for i in range(n_iters):
                idx = i * batch_size
                pr.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))
        else:
            pr.update((y_pred, y))

        np_y = y.numpy().ravel()
        np_y_pred = y_pred.numpy().ravel()

        assert pr._type == "binary"
        assert pr._updated is True
        assert isinstance(pr.compute(), float if average else torch.Tensor)
        pr_compute = pr.compute() if average else pr.compute().numpy()
        assert precision_score(np_y, np_y_pred, average="binary") == pytest.approx(pr_compute)

    def get_test_cases():

        test_cases = [
            # Binary accuracy on input of shape (N, 1) or (N, )
            (torch.randint(0, 2, size=(10,)), torch.randint(0, 2, size=(10,)), 1),
            (torch.randint(0, 2, size=(10, 1)), torch.randint(0, 2, size=(10, 1)), 1),
            # updated batches
            (torch.randint(0, 2, size=(50,)), torch.randint(0, 2, size=(50,)), 16),
            (torch.randint(0, 2, size=(50, 1)), torch.randint(0, 2, size=(50, 1)), 16),
            # Binary accuracy on input of shape (N, L)
            (torch.randint(0, 2, size=(10, 5)), torch.randint(0, 2, size=(10, 5)), 1),
            (torch.randint(0, 2, size=(10, 1, 5)), torch.randint(0, 2, size=(10, 1, 5)), 1),
            # updated batches
            (torch.randint(0, 2, size=(50, 5)), torch.randint(0, 2, size=(50, 5)), 16),
            (torch.randint(0, 2, size=(50, 1, 5)), torch.randint(0, 2, size=(50, 1, 5)), 16),
            # Binary accuracy on input of shape (N, H, W)
            (torch.randint(0, 2, size=(10, 12, 10)), torch.randint(0, 2, size=(10, 12, 10)), 1),
            (torch.randint(0, 2, size=(10, 1, 12, 10)), torch.randint(0, 2, size=(10, 1, 12, 10)), 1),
            # updated batches
            (torch.randint(0, 2, size=(50, 12, 10)), torch.randint(0, 2, size=(50, 12, 10)), 16),
            (torch.randint(0, 2, size=(50, 1, 12, 10)), torch.randint(0, 2, size=(50, 1, 12, 10)), 16),
            # Corner case with all zeros predictions
            (torch.zeros(size=(10,)), torch.randint(0, 2, size=(10,)), 1),
            (torch.zeros(size=(10, 1)), torch.randint(0, 2, size=(10, 1)), 1),
        ]

        return test_cases

    for _ in range(5):
        # check multiple random inputs as random exact occurencies are rare
        test_cases = get_test_cases()
        for y_pred, y, batch_size in test_cases:
            _test(y, y_pred, batch_size)
示例#3
0
def test_multilabel_input(average):

    pr = Precision(average=average, is_multilabel=True)

    def _test(y_pred, y, batch_size):
        pr.reset()
        if batch_size > 1:
            n_iters = y.shape[0] // batch_size + 1
            for i in range(n_iters):
                idx = i * batch_size
                pr.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))
        else:
            pr.update((y_pred, y))

        np_y_pred = to_numpy_multilabel(y_pred)
        np_y = to_numpy_multilabel(y)

        assert pr._type == "multilabel"
        pr_compute = pr.compute() if average else pr.compute().mean().item()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            assert precision_score(np_y, np_y_pred, average="samples") == pytest.approx(pr_compute)

        pr1 = Precision(is_multilabel=True, average=True)
        pr2 = Precision(is_multilabel=True, average=False)
        pr1.update((y_pred, y))
        pr2.update((y_pred, y))
        assert pr1.compute() == pytest.approx(pr2.compute().mean().item())

    def get_test_cases():

        test_cases = [
            # Multilabel input data of shape (N, C)
            (torch.randint(0, 2, size=(10, 5)), torch.randint(0, 2, size=(10, 5)), 1),
            (torch.randint(0, 2, size=(10, 4)), torch.randint(0, 2, size=(10, 4)), 1),
            # updated batches
            (torch.randint(0, 2, size=(50, 5)), torch.randint(0, 2, size=(50, 5)), 16),
            (torch.randint(0, 2, size=(50, 4)), torch.randint(0, 2, size=(50, 4)), 16),
            # Multilabel input data of shape (N, C, L)
            (torch.randint(0, 2, size=(10, 5, 10)), torch.randint(0, 2, size=(10, 5, 10)), 1),
            (torch.randint(0, 2, size=(10, 4, 10)), torch.randint(0, 2, size=(10, 4, 10)), 1),
            # updated batches
            (torch.randint(0, 2, size=(50, 5, 10)), torch.randint(0, 2, size=(50, 5, 10)), 16),
            (torch.randint(0, 2, size=(50, 4, 10)), torch.randint(0, 2, size=(50, 4, 10)), 16),
            # Multilabel input data of shape (N, H, W, ...) and (N, C, H, W, ...)
            (torch.randint(0, 2, size=(10, 5, 18, 16)), torch.randint(0, 2, size=(10, 5, 18, 16)), 1),
            (torch.randint(0, 2, size=(10, 4, 20, 23)), torch.randint(0, 2, size=(10, 4, 20, 23)), 1),
            # updated batches
            (torch.randint(0, 2, size=(50, 5, 18, 16)), torch.randint(0, 2, size=(50, 5, 18, 16)), 16),
            (torch.randint(0, 2, size=(50, 4, 20, 23)), torch.randint(0, 2, size=(50, 4, 20, 23)), 16),
        ]

        return test_cases

    for _ in range(5):
        # check multiple random inputs as random exact occurencies are rare
        test_cases = get_test_cases()
        for y_pred, y, batch_size in test_cases:
            _test(y_pred, y, batch_size)
示例#4
0
    def _test(average):
        pr = Precision(average=average)

        y_pred = torch.randint(0, 2, size=(10, 4)).float()
        y = torch.randint(4, 5, size=(10,)).long()

        with pytest.raises(ValueError):
            pr.update((y_pred, y))
示例#5
0
def test_compute_average():
    precision = Precision(average=True)

    y_pred = torch.eye(4)
    y = torch.ones(4).type(torch.LongTensor)
    precision.update((y_pred, y))
    assert isinstance(precision.compute(), float)
    assert precision.compute() == 0.25
示例#6
0
    def _test(average):
        pr = Precision(average=average)

        y_pred = torch.randint(0, 2, size=(10, 12, 10))
        y = torch.randint(0, 2, size=(10, 12, 10)).type(torch.LongTensor)
        pr.update((y_pred, y))
        np_y = y.numpy().ravel()
        np_y_pred = y_pred.numpy().ravel()
        assert pr._type == 'binary'
        assert isinstance(pr.compute(), float if average else torch.Tensor)
        pr_compute = pr.compute() if average else pr.compute().numpy()
        assert precision_score(np_y, np_y_pred,
                               average='binary') == pytest.approx(pr_compute)

        pr.reset()
        y_pred = torch.randint(0, 2, size=(10, 1, 12, 10))
        y = torch.randint(0, 2, size=(10, 1, 12, 10)).type(torch.LongTensor)
        pr.update((y_pred, y))
        np_y = y.numpy().ravel()
        np_y_pred = y_pred.numpy().ravel()
        assert pr._type == 'binary'
        assert isinstance(pr.compute(), float if average else torch.Tensor)
        pr_compute = pr.compute() if average else pr.compute().numpy()
        assert precision_score(np_y, np_y_pred,
                               average='binary') == pytest.approx(pr_compute)

        pr = Precision(average=average)
        # Batched Updates
        pr.reset()
        y_pred = torch.randint(0, 2, size=(100, 12, 10))
        y = torch.randint(0, 2, size=(100, 1, 12, 10)).type(torch.LongTensor)

        batch_size = 16
        n_iters = y.shape[0] // batch_size + 1

        for i in range(n_iters):
            idx = i * batch_size
            pr.update((y_pred[idx:idx + batch_size], y[idx:idx + batch_size]))

        np_y = y.numpy().ravel()
        np_y_pred = y_pred.numpy().ravel()
        assert pr._type == 'binary'
        assert isinstance(pr.compute(), float if average else torch.Tensor)
        pr_compute = pr.compute() if average else pr.compute().numpy()
        assert precision_score(np_y, np_y_pred,
                               average='binary') == pytest.approx(pr_compute)
示例#7
0
def metrics_estimating(tester, criterion):
    Accuracy(output_transform=thresholded_output_transform).attach(
        tester, 'accuracy')
    Recall(output_transform=thresholded_output_transform,
           average=True).attach(tester, 'recall')
    Precision(output_transform=thresholded_output_transform,
              average=True).attach(tester, 'precision')
    Loss(criterion).attach(tester, 'loss')
示例#8
0
def test_integration():

    def _test(p, r, average, output_transform):
        np.random.seed(1)

        n_iters = 10
        batch_size = 10
        n_classes = 10

        y_true = np.arange(0, n_iters * batch_size) % n_classes
        y_pred = 0.2 * np.random.rand(n_iters * batch_size, n_classes)
        for i in range(n_iters * batch_size):
            if np.random.rand() > 0.4:
                y_pred[i, y_true[i]] = 1.0
            else:
                j = np.random.randint(0, n_classes)
                y_pred[i, j] = 0.7

        y_true_batch_values = iter(y_true.reshape(n_iters, batch_size))
        y_pred_batch_values = iter(y_pred.reshape(n_iters, batch_size, n_classes))

        def update_fn(engine, batch):
            y_true_batch = next(y_true_batch_values)
            y_pred_batch = next(y_pred_batch_values)
            if output_transform is not None:
                return {
                    'y_pred': torch.from_numpy(y_pred_batch),
                    'y': torch.from_numpy(y_true_batch)
                }
            return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch)

        evaluator = Engine(update_fn)

        f2 = Fbeta(beta=2.0, average=average, precision=p, recall=r, output_transform=output_transform)
        f2.attach(evaluator, "f2")

        data = list(range(n_iters))
        state = evaluator.run(data, max_epochs=1)

        f2_true = fbeta_score(y_true, np.argmax(y_pred, axis=-1),
                              average='macro' if average else None, beta=2.0)
        if isinstance(state.metrics['f2'], torch.Tensor):
            np.testing.assert_allclose(f2_true, state.metrics['f2'].numpy())
        else:
            assert f2_true == pytest.approx(state.metrics['f2']), "{} vs {}".format(f2_true, state.metrics['f2'])

    _test(None, None, False, output_transform=None)
    _test(None, None, True, output_transform=None)

    def output_transform(output):
        return output['y_pred'], output['y']

    _test(None, None, False, output_transform=output_transform)
    _test(None, None, True, output_transform=output_transform)
    precision = Precision(average=False)
    recall = Recall(average=False)
    _test(precision, recall, False, None)
    _test(precision, recall, True, None)
示例#9
0
    def folds(self, kf):
        model = BGRU(self.input_size, self.hidden_size, self.num_layers,
                     self.num_classes, self.batch_size, self.dropout)
        loss = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=self.learning_rate)
        train_loader, valid_loader = _get_data_loader(kf, self.batch_size)
        trainer = create_supervised_trainer(model,
                                            optimizer,
                                            loss,
                                            device=DEVICE)
        evaluator = create_supervised_evaluator(model,
                                                metrics={
                                                    'acc':
                                                    CategoricalAccuracy(),
                                                    'loss': Loss(loss),
                                                    'prec':
                                                    Precision(average=True),
                                                    'recall':
                                                    Recall(average=True)
                                                },
                                                device=DEVICE)

        @trainer.on(Events.ITERATION_COMPLETED)
        def log_training_loss(trainer):
            iter_num = trainer.state.iteration
            if iter_num % 10 == 0:
                logger.info("Epoch[{}] Iter: {} Loss: {:.2f}".format(
                    trainer.state.epoch, iter_num, trainer.state.output))

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_training_results(trainer):
            evaluator.run(train_loader)
            metrics = evaluator.state.metrics
            f1 = (2 * metrics['prec'] *
                  metrics['recall']) / (metrics['prec'] + metrics['recall'])
            logger.info(
                "Train Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f} Avg Precision: {:.2f} Avg Recall: {:.2f} Avg F1 Score: {:.2f}"
                .format(trainer.state.epoch, metrics['acc'], metrics['loss'],
                        metrics['prec'], metrics['recall'], f1))

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_validation_results(trainer):
            evaluator.run(valid_loader)
            metrics = evaluator.state.metrics
            f1 = (2 * metrics['prec'] *
                  metrics['recall']) / (metrics['prec'] + metrics['recall'])
            for k in self.res.keys():
                if k != 'f1':
                    self.res[k].append(metrics[k])
                else:
                    self.res[k].append(f1)
            logger.info(
                "Valid Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f} Avg Precision: {:.2f} Avg Recall: {:.2f} Avg F1 Score: {:.2f}"
                .format(trainer.state.epoch, metrics['acc'], metrics['loss'],
                        metrics['prec'], metrics['recall'], f1))

        trainer.run(train_loader, max_epochs=self.num_epochs)
        return model
示例#10
0
def test_integration():
    np.random.seed(1)

    n_iters = 10
    batch_size = 10
    n_classes = 10

    y_true = np.arange(0, n_iters * batch_size) % n_classes
    y_pred = 0.2 * np.random.rand(n_iters * batch_size, n_classes)
    for i in range(n_iters * batch_size):
        if np.random.rand() > 0.4:
            y_pred[i, y_true[i]] = 1.0
        else:
            j = np.random.randint(0, n_classes)
            y_pred[i, j] = 0.7

    y_true_batch_values = iter(y_true.reshape(n_iters, batch_size))
    y_pred_batch_values = iter(y_pred.reshape(n_iters, batch_size, n_classes))

    def update_fn(engine, batch):
        y_true_batch = next(y_true_batch_values)
        y_pred_batch = next(y_pred_batch_values)
        return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch)

    evaluator = Engine(update_fn)

    precision = Precision(average=False)
    recall = Recall(average=False)

    def Fbeta(r, p, beta):
        return torch.mean((1 + beta**2) * p * r / (beta**2 * p + r)).item()

    F1 = MetricsLambda(Fbeta, recall, precision, 1)

    precision.attach(evaluator, "precision")
    recall.attach(evaluator, "recall")
    F1.attach(evaluator, "f1")

    data = list(range(n_iters))
    state = evaluator.run(data, max_epochs=1)

    precision_true = precision_score(y_true,
                                     np.argmax(y_pred, axis=-1),
                                     average=None)
    recall_true = recall_score(y_true,
                               np.argmax(y_pred, axis=-1),
                               average=None)
    f1_true = f1_score(y_true, np.argmax(y_pred, axis=-1), average='macro')

    precision = state.metrics['precision'].numpy()
    recall = state.metrics['recall'].numpy()

    assert precision_true == approx(precision), "{} vs {}".format(
        precision_true, precision)
    assert recall_true == approx(recall), "{} vs {}".format(
        recall_true, recall)
    assert f1_true == approx(state.metrics['f1']), "{} vs {}".format(
        f1_true, state.metrics['f1'])
示例#11
0
def test_multiclass_input(average):

    pr = Precision(average=average)

    def _test(y_pred, y, batch_size):
        pr.reset()
        if batch_size > 1:
            n_iters = y.shape[0] // batch_size + 1
            for i in range(n_iters):
                idx = i * batch_size
                pr.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))
        else:
            pr.update((y_pred, y))

        num_classes = y_pred.shape[1]
        np_y_pred = y_pred.argmax(dim=1).numpy().ravel()
        np_y = y.numpy().ravel()

        assert pr._type == "multiclass"
        assert isinstance(pr.compute(), float if average else torch.Tensor)
        pr_compute = pr.compute() if average else pr.compute().numpy()
        sk_average_parameter = "macro" if average else None
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            sk_compute = precision_score(np_y, np_y_pred, labels=range(0, num_classes), average=sk_average_parameter)
            assert sk_compute == pytest.approx(pr_compute)

    def get_test_cases():

        test_cases = [
            # Multiclass input data of shape (N, ) and (N, C)
            (torch.rand(10, 6), torch.randint(0, 6, size=(10,)), 1),
            (torch.rand(10, 4), torch.randint(0, 4, size=(10,)), 1),
            # updated batches
            (torch.rand(50, 6), torch.randint(0, 6, size=(50,)), 16),
            (torch.rand(50, 4), torch.randint(0, 4, size=(50,)), 16),
            # Multiclass input data of shape (N, L) and (N, C, L)
            (torch.rand(10, 5, 8), torch.randint(0, 5, size=(10, 8)), 1),
            (torch.rand(10, 8, 12), torch.randint(0, 8, size=(10, 12)), 1),
            # updated batches
            (torch.rand(50, 5, 8), torch.randint(0, 5, size=(50, 8)), 16),
            (torch.rand(50, 8, 12), torch.randint(0, 8, size=(50, 12)), 16),
            # Multiclass input data of shape (N, H, W, ...) and (N, C, H, W, ...)
            (torch.rand(10, 5, 18, 16), torch.randint(0, 5, size=(10, 18, 16)), 1),
            (torch.rand(10, 7, 20, 12), torch.randint(0, 7, size=(10, 20, 12)), 1),
            # updated batches
            (torch.rand(50, 5, 18, 16), torch.randint(0, 5, size=(50, 18, 16)), 16),
            (torch.rand(50, 7, 20, 12), torch.randint(0, 7, size=(50, 20, 12)), 16),
        ]

        return test_cases

    for _ in range(5):
        # check multiple random inputs as random exact occurencies are rare
        test_cases = get_test_cases()
        for y_pred, y, batch_size in test_cases:
            _test(y_pred, y, batch_size)
示例#12
0
    def _test(average):
        pr = Precision(average=average)

        y_pred = torch.rand(10, 5, 18, 16)
        y = torch.randint(0, 5, size=(10, 18, 16)).long()
        pr.update((y_pred, y))
        num_classes = y_pred.shape[1]
        np_y_pred = y_pred.argmax(dim=1).numpy().ravel()
        np_y = y.numpy().ravel()
        assert pr._type == "multiclass"
        assert isinstance(pr.compute(), float if average else torch.Tensor)
        pr_compute = pr.compute() if average else pr.compute().numpy()
        sk_average_parameter = "macro" if average else None
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            sk_compute = precision_score(np_y, np_y_pred, labels=range(0, num_classes), average=sk_average_parameter)
            assert sk_compute == pytest.approx(pr_compute)

        pr.reset()
        y_pred = torch.rand(10, 7, 20, 12)
        y = torch.randint(0, 7, size=(10, 20, 12)).long()
        pr.update((y_pred, y))
        num_classes = y_pred.shape[1]
        np_y_pred = y_pred.argmax(dim=1).numpy().ravel()
        np_y = y.numpy().ravel()
        assert pr._type == "multiclass"
        assert isinstance(pr.compute(), float if average else torch.Tensor)
        pr_compute = pr.compute() if average else pr.compute().numpy()
        sk_average_parameter = "macro" if average else None
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            sk_compute = precision_score(np_y, np_y_pred, labels=range(0, num_classes), average=sk_average_parameter)
            assert sk_compute == pytest.approx(pr_compute)

        # Batched Updates
        pr.reset()
        y_pred = torch.rand(100, 8, 12, 14)
        y = torch.randint(0, 8, size=(100, 12, 14)).long()

        batch_size = 16
        n_iters = y.shape[0] // batch_size + 1

        for i in range(n_iters):
            idx = i * batch_size
            pr.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))

        num_classes = y_pred.shape[1]
        np_y = y.numpy().ravel()
        np_y_pred = y_pred.argmax(dim=1).numpy().ravel()
        assert pr._type == "multiclass"
        assert isinstance(pr.compute(), float if average else torch.Tensor)
        pr_compute = pr.compute() if average else pr.compute().numpy()
        sk_average_parameter = "macro" if average else None
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            sk_compute = precision_score(np_y, np_y_pred, labels=range(0, num_classes), average=sk_average_parameter)
            assert sk_compute == pytest.approx(pr_compute)
示例#13
0
    def __init__(self, prefix, loss_type: str, threshold=0.5, top_k=[1, 5, 10], n_classes: int = None,
                 multilabel: bool = None, metrics=["precision", "recall", "top_k", "accuracy"]):
        super().__init__()

        self.loss_type = loss_type.upper()
        self.threshold = threshold
        self.n_classes = n_classes
        self.multilabel = multilabel
        self.top_ks = top_k
        self.prefix = prefix

        self.metrics = {}
        for metric in metrics:
            if "precision" == metric:
                self.metrics[metric] = Precision(average=True, is_multilabel=multilabel)
            elif "recall" == metric:
                self.metrics[metric] = Recall(average=True, is_multilabel=multilabel)

            elif "top_k" in metric:
                if n_classes:
                    top_k = [k for k in top_k if k < n_classes]

                if multilabel:
                    self.metrics[metric] = TopKMultilabelAccuracy(k_s=top_k)
                else:
                    self.metrics[metric] = TopKCategoricalAccuracy(k=max(int(np.log(n_classes)), 1),
                                                                   output_transform=None)
            elif "macro_f1" in metric:
                self.metrics[metric] = F1(num_classes=n_classes, average="macro", multilabel=multilabel)
            elif "micro_f1" in metric:
                self.metrics[metric] = F1(num_classes=n_classes, average="micro", multilabel=multilabel)
            elif "mse" == metric:
                self.metrics[metric] = MeanSquaredError()
            elif "auroc" == metric:
                self.metrics[metric] = AUROC(num_classes=n_classes)
            elif "avg_precision" in metric:
                self.metrics[metric] = AveragePrecision(num_classes=n_classes, )


            elif "accuracy" in metric:
                self.metrics[metric] = Accuracy(top_k=int(metric.split("@")[-1]) if "@" in metric else None)

            elif "ogbn" in metric:
                self.metrics[metric] = OGBNodeClfMetrics(NodeEvaluator(metric))
            elif "ogbg" in metric:
                self.metrics[metric] = OGBNodeClfMetrics(GraphEvaluator(metric))
            elif "ogbl" in metric:
                self.metrics[metric] = OGBLinkPredMetrics(LinkEvaluator(metric))
            else:
                print(f"WARNING: metric {metric} doesn't exist")

            # Needed to add the PytorchGeometric methods as Modules, so they'll be on the correct CUDA device during training
            if isinstance(self.metrics[metric], torchmetrics.metric.Metric):
                setattr(self, metric, self.metrics[metric])

        self.reset_metrics()
示例#14
0
def test_compute_all_wrong():
    precision = Precision()

    y_pred = torch.FloatTensor([[1.0, 0.0], [1.0, 0.0]])
    y = torch.ones(2).type(torch.LongTensor)
    precision.update((y_pred, y))

    results = list(precision.compute())

    assert results[0] == 0.0
    assert results[1] == 0.0
示例#15
0
def test_integration():
    np.random.seed(1)

    n_iters = 10
    batch_size = 10
    n_classes = 10

    y_true = np.arange(0, n_iters * batch_size, dtype="int64") % n_classes
    y_pred = 0.2 * np.random.rand(n_iters * batch_size, n_classes)
    for i in range(n_iters * batch_size):
        if np.random.rand() > 0.4:
            y_pred[i, y_true[i]] = 1.0
        else:
            j = np.random.randint(0, n_classes)
            y_pred[i, j] = 0.7

    y_true_batch_values = iter(y_true.reshape(n_iters, batch_size))
    y_pred_batch_values = iter(y_pred.reshape(n_iters, batch_size, n_classes))

    def update_fn(engine, batch):
        y_true_batch = next(y_true_batch_values)
        y_pred_batch = next(y_pred_batch_values)
        return torch.from_numpy(y_pred_batch), torch.from_numpy(y_true_batch)

    evaluator = Engine(update_fn)

    precision = Precision(average=False)
    recall = Recall(average=False)
    F1 = precision * recall * 2 / (precision + recall)

    precision.attach(evaluator, "precision")
    recall.attach(evaluator, "recall")
    F1.attach(evaluator, "f1")

    data = list(range(n_iters))
    state = evaluator.run(data, max_epochs=1)

    precision_true = precision_score(y_true,
                                     np.argmax(y_pred, axis=-1),
                                     average=None)
    recall_true = recall_score(y_true,
                               np.argmax(y_pred, axis=-1),
                               average=None)
    f1_true = f1_score(y_true, np.argmax(y_pred, axis=-1), average=None)

    precision = state.metrics["precision"].numpy()
    recall = state.metrics["recall"].numpy()
    f1 = state.metrics["f1"].numpy()

    assert precision_true == approx(precision), "{} vs {}".format(
        precision_true, precision)
    assert recall_true == approx(recall), "{} vs {}".format(
        recall_true, recall)
    assert f1_true == approx(f1), "{} vs {}".format(f1_true, f1)
示例#16
0
def test_wrong_inputs():

    with pytest.raises(ValueError, match=r"Beta should be a positive integer"):
        Fbeta(0.0)

    with pytest.raises(ValueError, match=r"Input precision metric should have average=False"):
        p = Precision(average=True)
        Fbeta(1.0, precision=p)

    with pytest.raises(ValueError, match=r"Input recall metric should have average=False"):
        r = Recall(average=True)
        Fbeta(1.0, recall=r)

    with pytest.raises(ValueError, match=r"If precision argument is provided, output_transform should be None"):
        p = Precision(average=False)
        Fbeta(1.0, precision=p, output_transform=lambda x: x)

    with pytest.raises(ValueError, match=r"If recall argument is provided, output_transform should be None"):
        r = Recall(average=False)
        Fbeta(1.0, recall=r, output_transform=lambda x: x)
示例#17
0
def test_wrong_inputs():

    with pytest.raises(ValueError, match=r"Beta should be a positive integer"):
        Fbeta(0.0)

    with pytest.raises(ValueError, match=r"Input precision metric should have average=False"):
        p = Precision(average=True)
        Fbeta(1.0, precision=p)

    with pytest.raises(ValueError, match=r"Input recall metric should have average=False"):
        r = Recall(average=True)
        Fbeta(1.0, recall=r)
def get_metrics(non_binary_y_target):
    metrics = {
        'accuracy': BinaryAccuracy(output_transform=zero_one_transform),
        'bce': Loss(nn.modules.loss.BCELoss()),
        'f1_score': F1_Score(output_transform=zero_one_transform),
        'roc_auc': ROC_AUC(),
        'precision': Precision(output_transform=zero_one_transform),
        'recall': Recall(output_transform=zero_one_transform),
        'conf_matrix': ConfusionMatrix(output_transform=zero_one_transform),
        # 'positive_stat':    PositiveStatistics(non_binary_y_target),
    }
    return metrics
示例#19
0
def create_task_metrics(torch_dtype, loss_func, output_transform=lambda x: x):
    if torch_dtype in (torch.float32, torch.float64):
        metrics = {'loss': Loss(loss_func, output_transform=output_transform)}

    elif torch_dtype in (torch.int16, torch.int32, torch.int64):
        metrics = {
            'accuracy': Accuracy(output_transform=output_transform),
            'precision': Precision(output_transform=output_transform),
            'recall': Recall(output_transform=output_transform),
            'loss': Loss(F.nll_loss, output_transform=output_transform)
        }
    return metrics
示例#20
0
    def _test(average):
        pr = Precision(average=average)

        y_pred = torch.softmax(torch.rand(4, 4), dim=1)
        y = torch.ones(4).long()
        pr.update((y_pred, y))

        y_pred = torch.randint(0, 2, size=(4,))
        y = torch.ones(4).long()

        with pytest.raises(RuntimeError):
            pr.update((y_pred, y))
示例#21
0
def get_evaluators(model, configuration):
    assert (
        configuration.data_type in EVALUATOR_FACTORY_MAP
    ), "Data type not in {}".format(EVALUATOR_FACTORY_MAP.keys())
    metrics = {
        "accuracy": Accuracy(_output_transform),
        "precision": Precision(_output_transform),
        "recall": Recall(_output_transform),
        "loss": Loss(get_criterion(configuration)),
        "auc": ROC_AUC(),
        "tnr": Recall(_negative_output_transform),
        "npv": Precision(_negative_output_transform),
    }
    train_evaluator = EVALUATOR_FACTORY_MAP[configuration.data_type](
        model, metrics=metrics, device=configuration.device,
    )
    val_evaluator = EVALUATOR_FACTORY_MAP[configuration.data_type](
        model, metrics=metrics, device=configuration.device,
    )

    return train_evaluator, val_evaluator
示例#22
0
    def _test(average):
        pr = Precision(average=average)

        y_pred = torch.softmax(torch.rand(4, 4), dim=1)
        y = torch.ones(4).type(torch.LongTensor)
        pr.update((y_pred, y))

        y_pred = torch.rand(4, 1)
        y = torch.ones(4).type(torch.LongTensor)

        with pytest.raises(RuntimeError):
            pr.update((y_pred, y))
示例#23
0
def _test_distrib_itegration(device):
    import torch.distributed as dist

    rank = dist.get_rank()
    torch.manual_seed(12)

    def _test(p, r, average, n_epochs):
        n_iters = 60
        s = 16
        n_classes = 7

        offset = n_iters * s
        y_true = torch.randint(0,
                               n_classes,
                               size=(offset *
                                     dist.get_world_size(), )).to(device)
        y_preds = torch.rand(offset * dist.get_world_size(),
                             n_classes).to(device)

        def update(engine, i):
            return (
                y_preds[i * s + rank * offset:(i + 1) * s + rank * offset, :],
                y_true[i * s + rank * offset:(i + 1) * s + rank * offset],
            )

        engine = Engine(update)

        fbeta = Fbeta(beta=2.5, average=average, device=device)
        fbeta.attach(engine, "f2.5")

        data = list(range(n_iters))
        engine.run(data=data, max_epochs=n_epochs)

        assert "f2.5" in engine.state.metrics
        res = engine.state.metrics["f2.5"]
        if isinstance(res, torch.Tensor):
            res = res.cpu().numpy()

        true_res = fbeta_score(
            y_true.cpu().numpy(),
            torch.argmax(y_preds, dim=1).cpu().numpy(),
            beta=2.5,
            average="macro" if average else None,
        )

        assert pytest.approx(res) == true_res

    _test(None, None, average=True, n_epochs=1)
    _test(None, None, average=True, n_epochs=2)
    precision = Precision(average=False)
    recall = Recall(average=False)
    _test(precision, recall, average=False, n_epochs=1)
    _test(precision, recall, average=False, n_epochs=2)
示例#24
0
 def _create_evaluator_engine(self):
     """ """
     return create_supervised_evaluator(
         self.model,
         device=self.device,
         metrics={
             "Accuracy": Accuracy(),
             "Loss": Loss(self.loss),
             "Recall": Recall(average=True),
             "Top K Categorical Accuracy": TopKCategoricalAccuracy(k=10),
             "Precision": Precision(average=True),
         },
     )
示例#25
0
    def _test(average):
        pr = Precision(average=average, is_multilabel=True)

        y_pred = torch.randint(0, 2, size=(10, 5, 18, 16))
        y = torch.randint(0, 2, size=(10, 5, 18, 16)).long()
        pr.update((y_pred, y))
        np_y_pred = to_numpy_multilabel(y_pred)
        np_y = to_numpy_multilabel(y)
        assert pr._type == "multilabel"
        pr_compute = pr.compute() if average else pr.compute().mean().item()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            assert precision_score(
                np_y, np_y_pred,
                average="samples") == pytest.approx(pr_compute)

        pr.reset()
        y_pred = torch.randint(0, 2, size=(10, 4, 20, 23))
        y = torch.randint(0, 2, size=(10, 4, 20, 23)).long()
        pr.update((y_pred, y))
        np_y_pred = to_numpy_multilabel(y_pred)
        np_y = to_numpy_multilabel(y)
        assert pr._type == "multilabel"
        pr_compute = pr.compute() if average else pr.compute().mean().item()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            assert precision_score(
                np_y, np_y_pred,
                average="samples") == pytest.approx(pr_compute)

        # Batched Updates
        pr.reset()
        y_pred = torch.randint(0, 2, size=(100, 5, 12, 14))
        y = torch.randint(0, 2, size=(100, 5, 12, 14)).long()

        batch_size = 16
        n_iters = y.shape[0] // batch_size + 1

        for i in range(n_iters):
            idx = i * batch_size
            pr.update((y_pred[idx:idx + batch_size], y[idx:idx + batch_size]))

        np_y = to_numpy_multilabel(y)
        np_y_pred = to_numpy_multilabel(y_pred)
        assert pr._type == "multilabel"
        pr_compute = pr.compute() if average else pr.compute().mean().item()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            assert precision_score(
                np_y, np_y_pred,
                average="samples") == pytest.approx(pr_compute)
示例#26
0
    def run(self, logging_dir=None, best_model_only=True):

        #assert self.model is not None, '[ERROR] No model object loaded. Please load a PyTorch model torch.nn object into the class object.'
        #assert (self.train_loader is not None) or (self.val_loader is not None), '[ERROR] You must specify data loaders.'

        for key in self.trainer_status.keys():
            assert self.trainer_status[
                key], '[ERROR] The {} has not been generated and you cannot proceed.'.format(
                    key)
        print('[INFO] Trainer pass OK for training.')

        # TRAIN ENGINE
        # Create the objects for training
        self.train_engine = self.create_trainer()

        # METRICS AND EVALUATION
        # Metrics - running average
        RunningAverage(output_transform=lambda x: x).attach(
            self.train_engine, 'loss')

        # Metrics - epochs
        metrics = {
            'accuracy': Accuracy(),
            'recall': Recall(average=True),
            'precision': Precision(average=True),
            'f1': Fbeta(beta=1),
            'topKCatAcc': TopKCategoricalAccuracy(k=5),
            'loss': Loss(self.criterion)
        }

        # Create evaluators
        self.evaluator = self.create_evaluator(metrics=metrics)
        self.train_evaluator = self.create_evaluator(metrics=metrics,
                                                     tag='train')

        # LOGGING
        # Create logging to terminal
        self.add_logging()

        # Create Tensorboard logging
        self.add_tensorboard_logging(logging_dir=logging_dir)

        ## CALLBACKS
        self.create_callbacks(best_model_only=best_model_only)

        ## TRAIN
        # Train the model
        print('[INFO] Executing model training...')
        self.train_engine.run(self.train_loader,
                              max_epochs=self.config.TRAIN.NUM_EPOCHS)
        print('[INFO] Model training is complete.')
示例#27
0
    def _test(average):
        pr = Precision(average=average, is_multilabel=True)

        y_pred = torch.randint(0, 2, size=(20, 5))
        y = torch.randint(0, 2, size=(20, 5)).type(torch.LongTensor)
        pr.update((y_pred, y))
        np_y_pred = y_pred.numpy()
        np_y = y.numpy()
        assert pr._type == 'multilabel'
        pr_compute = pr.compute() if average else pr.compute().mean().item()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            assert precision_score(
                np_y, np_y_pred,
                average='samples') == pytest.approx(pr_compute)

        pr.reset()
        y_pred = torch.randint(0, 2, size=(10, 4))
        y = torch.randint(0, 2, size=(10, 4)).type(torch.LongTensor)
        pr.update((y_pred, y))
        np_y_pred = y_pred.numpy()
        np_y = y.numpy()
        assert pr._type == 'multilabel'
        pr_compute = pr.compute() if average else pr.compute().mean().item()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            assert precision_score(
                np_y, np_y_pred,
                average='samples') == pytest.approx(pr_compute)

        # Batched Updates
        pr.reset()
        y_pred = torch.randint(0, 2, size=(100, 4))
        y = torch.randint(0, 2, size=(100, 4)).type(torch.LongTensor)

        batch_size = 16
        n_iters = y.shape[0] // batch_size + 1

        for i in range(n_iters):
            idx = i * batch_size
            pr.update((y_pred[idx:idx + batch_size], y[idx:idx + batch_size]))

        np_y = y.numpy()
        np_y_pred = y_pred.numpy()
        assert pr._type == 'multilabel'
        pr_compute = pr.compute() if average else pr.compute().mean().item()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            assert precision_score(
                np_y, np_y_pred,
                average='samples') == pytest.approx(pr_compute)
示例#28
0
def test_multiclass_wrong_inputs():
    pr = Precision()

    with pytest.raises(ValueError):
        # incompatible shapes
        pr.update((torch.rand(10, 5, 4), torch.randint(0, 2, size=(10,)).long()))

    with pytest.raises(ValueError):
        # incompatible shapes
        pr.update((torch.rand(10, 5, 6), torch.randint(0, 5, size=(10, 5)).long()))

    with pytest.raises(ValueError):
        # incompatible shapes
        pr.update((torch.rand(10), torch.randint(0, 5, size=(10, 5, 6)).long()))

    pr = Precision(average=True)

    with pytest.raises(ValueError):
        # incompatible shapes between two updates
        pr.update((torch.rand(10, 5), torch.randint(0, 5, size=(10,)).long()))
        pr.update((torch.rand(10, 6), torch.randint(0, 5, size=(10,)).long()))

    with pytest.raises(ValueError):
        # incompatible shapes between two updates
        pr.update((torch.rand(10, 5, 12, 14), torch.randint(0, 5, size=(10, 12, 14)).long()))
        pr.update((torch.rand(10, 6, 12, 14), torch.randint(0, 5, size=(10, 12, 14)).long()))

    pr = Precision(average=False)

    with pytest.raises(ValueError):
        # incompatible shapes between two updates
        pr.update((torch.rand(10, 5), torch.randint(0, 5, size=(10,)).long()))
        pr.update((torch.rand(10, 6), torch.randint(0, 5, size=(10,)).long()))

    with pytest.raises(ValueError):
        # incompatible shapes between two updates
        pr.update((torch.rand(10, 5, 12, 14), torch.randint(0, 5, size=(10, 12, 14)).long()))
        pr.update((torch.rand(10, 6, 12, 14), torch.randint(0, 5, size=(10, 12, 14)).long()))
示例#29
0
def test_incorrect_shape():
    precision = Precision()

    y_pred = torch.zeros(2, 3, 2, 2)
    y = torch.zeros(2, 3)

    with pytest.raises(ValueError):
        precision.update((y_pred, y))

    y_pred = torch.zeros(2, 3, 2, 2)
    y = torch.zeros(2, 3, 4, 4)

    with pytest.raises(ValueError):
        precision.update((y_pred, y))
示例#30
0
    def _test(average, n_epochs, metric_device):
        n_iters = 60
        s = 16
        n_classes = 7

        offset = n_iters * s
        y_true = torch.randint(0,
                               2,
                               size=(offset * idist.get_world_size(),
                                     n_classes, 6, 8)).to(device)
        y_preds = torch.randint(0,
                                2,
                                size=(offset * idist.get_world_size(),
                                      n_classes, 6, 8)).to(device)

        def update(engine, i):
            return (
                y_preds[i * s + rank * offset:(i + 1) * s + rank * offset,
                        ...],
                y_true[i * s + rank * offset:(i + 1) * s + rank * offset, ...],
            )

        engine = Engine(update)

        pr = Precision(average=average,
                       is_multilabel=True,
                       device=metric_device)
        pr.attach(engine, "pr")

        data = list(range(n_iters))
        engine.run(data=data, max_epochs=n_epochs)

        assert "pr" in engine.state.metrics
        res = engine.state.metrics["pr"]
        res2 = pr.compute()
        if isinstance(res, torch.Tensor):
            res = res.cpu().numpy()
            res2 = res2.cpu().numpy()
            assert (res == res2).all()
        else:
            assert res == res2

        np_y_preds = to_numpy_multilabel(y_preds)
        np_y_true = to_numpy_multilabel(y_true)
        assert pr._type == "multilabel"
        res = res if average else res.mean().item()
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            assert precision_score(np_y_true, np_y_preds,
                                   average="samples") == pytest.approx(res)