def test_fbeta_multilabel_with_weighted_average(self, device: str): self.predictions = self.predictions.to(device) self.targets = self.targets.to(device) labels = [0, 1] fbeta = FBetaMultiLabelMeasure(average="weighted", labels=labels) fbeta(self.predictions, self.targets) metric = fbeta.get_metric() precisions = metric["precision"] recalls = metric["recall"] fscores = metric["fscore"] weighted_precision, weighted_recall, weighted_fscore, _ = precision_recall_fscore_support( self.targets.cpu().numpy(), torch.where( self.predictions >= fbeta._threshold, torch.ones_like(self.predictions), torch.zeros_like(self.predictions), ) .cpu() .numpy(), labels=labels, average="weighted", ) # check value assert_allclose(precisions, weighted_precision) assert_allclose(recalls, weighted_recall) assert_allclose(fscores, weighted_fscore)
def test_fbeta_multilabel_with_micro_average(self, device: str): self.predictions = self.predictions.to(device) self.targets = self.targets.to(device) labels = [1, 3] fbeta = FBetaMultiLabelMeasure(average="micro", labels=labels) fbeta(self.predictions, self.targets) metric = fbeta.get_metric() precisions = metric["precision"] recalls = metric["recall"] fscores = metric["fscore"] # We keep the expected values in CPU because FBetaMeasure returns them in CPU. true_positives = torch.tensor([3, 4], dtype=torch.float32) false_positives = torch.tensor([0, 0], dtype=torch.float32) false_negatives = torch.tensor([2, 0], dtype=torch.float32) mean_true_positive = true_positives.mean() mean_false_positive = false_positives.mean() mean_false_negative = false_negatives.mean() micro_precision = mean_true_positive / (mean_true_positive + mean_false_positive) micro_recall = mean_true_positive / (mean_true_positive + mean_false_negative) micro_fscore = (2 * micro_precision * micro_recall) / (micro_precision + micro_recall) # check value assert_allclose(precisions, micro_precision) assert_allclose(recalls, micro_recall) assert_allclose(fscores, micro_fscore)
def test_fbeta_multilabel_with_mask(self, device: str): self.predictions = self.predictions.to(device) self.targets = self.targets.to(device) mask = torch.tensor([True, True, True, True, True, False], device=device).unsqueeze(-1) fbeta = FBetaMultiLabelMeasure() fbeta(self.predictions, self.targets, mask) metric = fbeta.get_metric() precisions = metric["precision"] recalls = metric["recall"] fscores = metric["fscore"] assert_allclose(fbeta._pred_sum.tolist(), [3, 3, 3, 4, 1]) assert_allclose(fbeta._true_sum.tolist(), [4, 5, 2, 4, 0]) assert_allclose(fbeta._true_positive_sum.tolist(), [3, 3, 2, 4, 0]) desired_precisions = [3 / 3, 3 / 3, 2 / 3, 4 / 4, 0 / 1] desired_recalls = [3 / 4, 3 / 5, 2 / 2, 4 / 4, 0.00] desired_fscores = [ (2 * p * r) / (p + r) if p + r != 0.0 else 0.0 for p, r in zip(desired_precisions, desired_recalls) ] assert_allclose(precisions, desired_precisions) assert_allclose(recalls, desired_recalls) assert_allclose(fscores, desired_fscores)
def test_fbeta_multilabel_handles_batch_size_of_one(self, device: str): predictions = torch.tensor([[0.2862, 0.5479, 0.1627, 0.2033]], device=device) targets = torch.tensor([[0, 1, 0, 0]], device=device) mask = torch.tensor([[True]], device=device) fbeta = FBetaMultiLabelMeasure() fbeta(predictions, targets, mask) metric = fbeta.get_metric() precisions = metric["precision"] recalls = metric["recall"] assert_allclose(precisions, [0.0, 1.0, 0.0, 0.0]) assert_allclose(recalls, [0.0, 1.0, 0.0, 0.0])
def test_fbeta_multilabel_handles_no_prediction_true_all_class(self, device: str): predictions = torch.tensor([[0.65, 0.35], [0.0, 0.0]], device=device) # preds = [0, NA] targets = torch.tensor([[0, 1], [0, 1]], device=device) fbeta = FBetaMultiLabelMeasure() fbeta(predictions, targets) metric = fbeta.get_metric() precisions = metric["precision"] recalls = metric["recall"] fscores = metric["fscore"] assert_allclose(precisions, [0.0, 0.0]) assert_allclose(recalls, [0.0, 0.0]) assert_allclose(fscores, [0.0, 0.0])
def test_fbeta_multilabel_with_explicit_labels(self, device: str): self.predictions = self.predictions.to(device) self.targets = self.targets.to(device) # same prediction but with and explicit label ordering fbeta = FBetaMultiLabelMeasure(labels=[4, 3, 2, 1, 0]) fbeta(self.predictions, self.targets) metric = fbeta.get_metric() precisions = metric["precision"] recalls = metric["recall"] fscores = metric["fscore"] desired_precisions = self.desired_precisions[::-1] desired_recalls = self.desired_recalls[::-1] desired_fscores = self.desired_fscores[::-1] # check value assert_allclose(precisions, desired_precisions) assert_allclose(recalls, desired_recalls) assert_allclose(fscores, desired_fscores)
def test_fbeta_multilabel_metric(self, device: str): self.predictions = self.predictions.to(device) self.targets = self.targets.to(device) fbeta = FBetaMultiLabelMeasure() fbeta(self.predictions, self.targets) metric = fbeta.get_metric() precisions = metric["precision"] recalls = metric["recall"] fscores = metric["fscore"] # check value assert_allclose(precisions, self.desired_precisions) assert_allclose(recalls, self.desired_recalls) assert_allclose(fscores, self.desired_fscores) # check type assert isinstance(precisions, List) assert isinstance(recalls, List) assert isinstance(fscores, List)
def test_fbeta_multilabel_with_macro_average(self, device: str): self.predictions = self.predictions.to(device) self.targets = self.targets.to(device) labels = [0, 1] fbeta = FBetaMultiLabelMeasure(average="macro", labels=labels) fbeta(self.predictions, self.targets) metric = fbeta.get_metric() precisions = metric["precision"] recalls = metric["recall"] fscores = metric["fscore"] # We keep the expected values in CPU because FBetaMeasure returns them in CPU. macro_precision = torch.tensor(self.desired_precisions)[labels].mean() macro_recall = torch.tensor(self.desired_recalls)[labels].mean() macro_fscore = torch.tensor(self.desired_fscores)[labels].mean() # check value assert_allclose(precisions, macro_precision) assert_allclose(recalls, macro_recall) assert_allclose(fscores, macro_fscore)
def multiple_runs( global_rank: int, world_size: int, gpu_id: Union[int, torch.device], metric: FBetaMultiLabelMeasure, metric_kwargs: Dict[str, List[Any]], desired_values: Dict[str, Any], exact: Union[bool, Tuple[float, float]] = True, ): kwargs = {} # Use the arguments meant for the process with rank `global_rank`. for argname in metric_kwargs: kwargs[argname] = metric_kwargs[argname][global_rank] for i in range(200): metric(**kwargs) metric_values = metric.get_metric() for key in desired_values: assert_allclose(desired_values[key], metric_values[key])