def test_fscore_empty(test_mask_batch): num_classes = 6 pred_classes = (test_mask_batch - 1) % num_classes assert pred_classes.shape == (4, 4, 4) tp, fp, tn, fn = func.statistics_step(pred_classes, test_mask_batch, num_classes=num_classes, ignore_index=255, reduction=False) score = func.f1_score(tp, fp, fn, reduce=True) assert score == 0.0 LOG.debug(score)
def test_fscore_best(): num_classes = 6 test_mask_batch = torch.randint(0, 6, size=(4, 4, 4)) pred_classes = test_mask_batch.clone() assert pred_classes.shape == (4, 4, 4) tp, fp, tn, fn = func.statistics_step(pred_classes, test_mask_batch, num_classes=num_classes, ignore_index=255, reduction=False) score = func.f1_score(tp, fp, fn, reduce=False) LOG.debug(score) assert torch.all(score[:4] >= 0.99)
def compute(self) -> torch.Tensor: """Computes the F1 score over every device, using the accumulated statistics. Same micro and macro-average considerations hold for this metric as well. :return: tensor with empty size when reduced, or (C,) where C in the number of classes :rtype: torch.Tensor """ score = func.f1_score(tp=self.tp, fp=self.fp, fn=self.fn, reduce=self.is_micro) if self.reduction == ReductionType.MACRO.value: score = score.mean() return score
def test_fscore_batch_macro(test_pred_batch, test_mask_batch): pred_classes = test_pred_batch.argmax(dim=1) assert pred_classes.shape == (4, 4, 4) tp, fp, tn, fn = func.statistics_step(pred_classes, test_mask_batch, num_classes=6, ignore_index=255, reduction=False) fscore = func.f1_score(tp, fp, fn, reduce=True) y_true, y_pred = func.valid_samples(255, test_mask_batch, pred_classes) skl_prec = f1_score(y_true.cpu().numpy(), y_pred.cpu().numpy(), average="macro") LOG.debug("sklearn: %s - custom: %s", str(skl_prec), str(fscore)) # sklearn does not account for empty classes diff = abs(skl_prec - fscore.item()) assert diff <= EPS