Exemplo n.º 1
0
    def test_distributed_covariance(self):
        batch_size = 10
        num_labels = 10
        predictions = torch.randn(batch_size, num_labels)
        labels = 0.5 * predictions + torch.randn(batch_size, num_labels)
        # Random binary mask
        mask = torch.randint(0, 2, size=(batch_size, num_labels)).bool()

        expected_covariance = np.cov(
            predictions.view(-1).cpu().numpy(),
            labels.view(-1).cpu().numpy(),
            fweights=mask.view(-1).cpu().numpy(),
        )[0, 1]

        predictions = [predictions[:5], predictions[5:]]
        labels = [labels[:5], labels[5:]]
        mask = [mask[:5], mask[5:]]

        metric_kwargs = {
            "predictions": predictions,
            "gold_labels": labels,
            "mask": mask
        }
        run_distributed_test(
            [-1, -1],
            global_distributed_metric,
            Covariance(),
            metric_kwargs,
            expected_covariance,
            exact=(0.0001, 1e-01),
        )
Exemplo n.º 2
0
    def test_distributed_metric_values(self):
        top_spans = torch.tensor([[[0, 1], [4, 6], [8, 9]]])
        antecedent_indices = torch.tensor([[[-1, -1, -1], [0, -1, -1],
                                            [0, 1, -1]]])
        predicted_antecedents = torch.tensor([[-1, -1, 1]])

        metadata_list = [[{
            "clusters": [((4, 6), (8, 9))]
        }], [{
            "clusters": [((0, 1), (4, 6))]
        }]]

        metric_kwargs = {
            "top_spans": [top_spans, top_spans],
            "antecedent_indices": [antecedent_indices, antecedent_indices],
            "predicted_antecedents":
            [predicted_antecedents, predicted_antecedents],
            "metadata_list": metadata_list,
        }

        desired_values = (0.625, 0.625, 0.625)

        run_distributed_test(
            [-1, -1],
            global_distributed_metric,
            ConllCorefScores(),
            metric_kwargs,
            desired_values,
            exact=True,
        )
Exemplo n.º 3
0
    def test_distributed_auc_unequal_batches(self):
        predictions = torch.randn(8)
        labels = torch.randint(3, 5, (8,), dtype=torch.long)
        # We make sure that the positive label is always present.
        labels[0] = 4
        labels[4] = 4

        false_positive_rates, true_positive_rates, _ = metrics.roc_curve(
            labels.cpu().numpy(), predictions.cpu().numpy(), pos_label=4
        )

        predictions = [predictions[:2], predictions[2:]]
        labels = [labels[:2], labels[2:]]

        metric_kwargs = {"predictions": predictions, "gold_labels": labels}
        desired_auc = metrics.auc(false_positive_rates, true_positive_rates)
        with pytest.raises(Exception) as _:
            run_distributed_test(
                [-1, -1],
                global_distributed_metric,
                Auc(positive_label=4),
                metric_kwargs,
                desired_auc,
                exact=False,
            )
Exemplo n.º 4
0
    def test_multiple_distributed_runs(self):
        predictions = [
            torch.tensor([[1, 0, 0], [1, 1, 0]]),
            torch.tensor([[1, 1, 1]]),
        ]
        gold_targets = [
            torch.tensor([[2, 0, 0], [1, 0, 0]]),
            torch.tensor([[1, 1, 2]]),
        ]

        check = math.exp(0.5 * (math.log(3) - math.log(6)) + 0.5 *
                         (math.log(1) - math.log(3)))
        metric_kwargs = {
            "predictions": predictions,
            "gold_targets": gold_targets
        }
        desired_values = {"BLEU": check}
        run_distributed_test(
            [-1, -1],
            multiple_runs,
            BLEU(ngram_weights=(0.5, 0.5), exclude_indices={0}),
            metric_kwargs,
            desired_values,
            exact=False,
        )
Exemplo n.º 5
0
    def test_distributed_accuracy(self):
        gold = torch.tensor([[2, 4, 8], [1, 2, 3], [7, 1, 1], [11, 14, 17]])
        predictions = torch.tensor([
            [[2, 4, 8], [2, 5, 9]],  # 3/3
            [[-1, 2, 4], [3, 8, -1]],  # 2/2
            [[-1, -1, -1], [7, 2, -1]],  # 1/2
            [[12, 13, 17], [11, 13, 18]],  # 2/2
        ])
        mask = torch.tensor([[True, True, True], [False, True, True],
                             [True, True, False], [True, False, True]])
        gold = [gold[:2], gold[2:]]
        predictions = [predictions[:2], predictions[2:]]
        mask = [mask[:2], mask[2:]]

        metric_kwargs = {
            "predictions": predictions,
            "gold_labels": gold,
            "mask": mask
        }
        desired_values = {"unigram_recall": 7 / 8}
        run_distributed_test(
            [-1, -1],
            global_distributed_metric,
            UnigramRecall(),
            metric_kwargs,
            desired_values,
            exact=False,
        )
 def test_multiple_distributed_runs(self):
     predictions = [
         torch.tensor(
             [
                 [0.55, 0.25, 0.10, 0.10, 0.20],
                 [0.10, 0.60, 0.10, 0.95, 0.00],
                 [0.90, 0.80, 0.75, 0.80, 0.00],
             ]
         ),
         torch.tensor(
             [
                 [0.49, 0.50, 0.95, 0.55, 0.00],
                 [0.60, 0.49, 0.60, 0.65, 0.85],
                 [0.85, 0.40, 0.10, 0.20, 0.00],
             ]
         ),
     ]
     targets = [
         torch.tensor([[1, 1, 0, 0, 0], [0, 1, 0, 1, 0], [1, 1, 0, 1, 0]]),
         torch.tensor([[1, 1, 1, 1, 0], [1, 1, 1, 1, 0], [0, 0, 0, 0, 0]]),
     ]
     metric_kwargs = {"predictions": predictions, "gold_labels": targets}
     desired_metrics = {
         "precision": self.desired_precisions,
         "recall": self.desired_recalls,
         "fscore": self.desired_fscores,
     }
     run_distributed_test(
         [-1, -1],
         multiple_runs,
         FBetaMultiLabelMeasure(),
         metric_kwargs,
         desired_metrics,
         exact=False,
     )
Exemplo n.º 7
0
    def test_multiple_distributed_runs(self):
        predictions = [torch.Tensor([[0, 1, 3, 5, 2, 4]]), torch.Tensor([[0, 3, 2, 1, 0, 0]])]

        gold_indices = [torch.Tensor([[0, 1, 3, 5, 2, 4]]), torch.Tensor([[0, 3, 2, 1, 0, 0]])]

        label_predictions = [
            torch.Tensor([[0, 5, 2, 3, 3, 3]]),
            torch.Tensor([[7, 4, 8, 2, 0, 0]]),
        ]

        gold_labels = [torch.Tensor([[0, 5, 2, 1, 4, 2]]), torch.Tensor([[0, 4, 8, 2, 0, 0]])]

        mask = [
            torch.tensor([[True, True, True, True, True, True]]),
            torch.tensor([[True, True, True, True, False, False]]),
        ]

        metric_kwargs = {
            "predicted_indices": predictions,
            "gold_indices": gold_indices,
            "predicted_labels": label_predictions,
            "gold_labels": gold_labels,
            "mask": mask,
        }

        desired_metrics = {
            "UAS": 1.0,
            "LAS": 0.6,
            "UEM": 1.0,
            "LEM": 0.0,
        }
        run_distributed_test(
            [-1, -1], multiple_runs, AttachmentScores(), metric_kwargs, desired_metrics, exact=True,
        )
Exemplo n.º 8
0
    def test_distributed_sequence_accuracy(self):
        gold = torch.tensor([[1, 2, 3], [2, 4, 8], [0, 1, 1], [11, 13, 17]])
        predictions = torch.tensor([
            [[1, 2, 3], [1, 2, -1]],
            [[2, 4, 8], [2, 5, 9]],
            [[-1, -1, -1], [0, 1, -1]],
            [[12, 13, 17], [11, 13, 18]],
        ])
        mask = torch.tensor([[False, True, True], [True, True, True],
                             [True, True, False], [True, False, True]], )
        gold = [gold[:2], gold[2:]]
        predictions = [predictions[:2], predictions[2:]]
        mask = [mask[:2], mask[2:]]

        metric_kwargs = {
            "predictions": predictions,
            "gold_labels": gold,
            "mask": mask
        }
        desired_values = {"accuracy": 3 / 4}
        run_distributed_test(
            [-1, -1],
            global_distributed_metric,
            SequenceAccuracy(),
            metric_kwargs,
            desired_values,
            exact=False,
        )
Exemplo n.º 9
0
    def test_distributed_npmixy_masked_computation(self):
        Y = torch.ones(3, 3).long()
        X = torch.eye(3).long()
        mask = torch.ones_like(Y).bool()

        expected_ova_npmixy_gaps = {
            0: [np.nan, 0.0],
            1: [np.nan, 0.0],
        }
        metric_kwargs = {"predicted_labels": Y, "protected_variable_labels": X, "mask": mask}
        run_distributed_test(
            [-1, -1],
            global_distributed_metric,
            AssociationWithoutGroundTruth(2, 2, "npmixy", "ova"),
            metric_kwargs,
            expected_ova_npmixy_gaps,
            exact=True,
        )

        expected_pairwise_npmixy_gaps = {
            0: {0: [np.nan, 0.0], 1: [np.nan, 0.0]},
            1: {0: [np.nan, 0.0], 1: [np.nan, 0.0]},
        }
        metric_kwargs = {"predicted_labels": Y, "protected_variable_labels": X, "mask": mask}
        run_distributed_test(
            [-1, -1],
            global_distributed_metric,
            AssociationWithoutGroundTruth(2, 2, "npmixy", "pairwise"),
            metric_kwargs,
            expected_pairwise_npmixy_gaps,
            exact=True,
        )
Exemplo n.º 10
0
 def test_multiple_distributed_runs(self):
     predictions = [torch.tensor([[0, 1], [2, 3]]), torch.tensor([[4, 5], [6, 7]])]
     targets = [torch.tensor([[0, 1], [2, 2]]), torch.tensor([[4, 5], [7, 7]])]
     metric_kwargs = {"predictions": predictions, "gold_labels": targets}
     desired_values = 0.5
     run_distributed_test(
         [-1, -1], multiple_runs, BooleanAccuracy(), metric_kwargs, desired_values, exact=True,
     )
Exemplo n.º 11
0
 def test_distributed_entropy(self):
     logits = torch.tensor([[1, 1, 1, 1], [1, 1, 1, 1]], dtype=torch.float)
     logits = [logits[0], logits[1]]
     metric_kwargs = {"logits": logits}
     desired_values = {"entropy": 1.38629436}
     run_distributed_test(
         [-1, -1],
         global_distributed_metric,
         Entropy(),
         metric_kwargs,
         desired_values,
         exact=False,
     )
Exemplo n.º 12
0
 def test_distributed_average(self, device: str):
     device_ids = [-1, -1] if device == "cpu" else [0, 1]
     metric_kwargs = {
         "value": [1.0, 2.0],
     }
     run_distributed_test(
         device_ids,
         global_distributed_metric,
         self.metric,
         metric_kwargs,
         1.5,
         exact=True,
     )
Exemplo n.º 13
0
 def test_distributed_accuracy_unequal_batches(self):
     predictions = [torch.tensor([[0, 1], [2, 3], [4, 5]]), torch.tensor([[6, 7]])]
     targets = [torch.tensor([[0, 1], [2, 2], [4, 5]]), torch.tensor([[7, 7]])]
     metric_kwargs = {"predictions": predictions, "gold_labels": targets}
     desired_values = 0.5
     run_distributed_test(
         [-1, -1],
         global_distributed_metric,
         BooleanAccuracy(),
         metric_kwargs,
         desired_values,
         exact=True,
     )
Exemplo n.º 14
0
    def test_distributed_independence_masked_computation(self):
        A = torch.eye(3).long()
        C = 2 * A
        mask = torch.ones_like(C).bool()

        expected_kl_divs = {0: 0.4055, 1: 1.0986}
        metric_kwargs = {"predicted_labels": C, "protected_variable_labels": A, "mask": mask}
        run_distributed_test(
            [-1, -1],
            global_distributed_metric,
            Independence(4, 2),
            metric_kwargs,
            expected_kl_divs,
            exact=False,
        )
Exemplo n.º 15
0
 def test_multiple_distributed_runs(self):
     predictions = [
         torch.tensor([[0.35, 0.25, 0.1, 0.1, 0.2]]),
         torch.tensor([[0.1, 0.6, 0.1, 0.2, 0.0]]),
     ]
     targets = [torch.tensor([0]), torch.tensor([3])]
     metric_kwargs = {"predictions": predictions, "gold_labels": targets}
     desired_accuracy = 0.5
     run_distributed_test(
         [-1, -1],
         multiple_runs,
         CategoricalAccuracy(),
         metric_kwargs,
         desired_accuracy,
         exact=True,
     )
Exemplo n.º 16
0
 def test_distributed_spearman(self):
     batch_size = 10
     num_labels = 10
     predictions = torch.randn(batch_size, num_labels)
     labels = 0.5 * predictions + torch.randn(batch_size, num_labels)
     desired_spearman = spearman_formula(predictions.reshape(-1), labels.reshape(-1))
     predictions = [predictions[:5], predictions[5:]]
     labels = [labels[:5], labels[5:]]
     metric_kwargs = {"predictions": predictions, "gold_labels": labels}
     run_distributed_test(
         [-1, -1],
         global_distributed_metric,
         SpearmanCorrelation(),
         metric_kwargs,
         desired_spearman,
         exact=False,
     )
Exemplo n.º 17
0
 def test_distributed_accuracy_unequal_batches(self):
     predictions = [
         torch.tensor([[0.35, 0.25, 0.1, 0.1, 0.2], [0.1, 0.6, 0.1, 0.2, 0.0]]),
         torch.tensor([[0.1, 0.2, 0.5, 0.2, 0.0]]),
     ]
     targets = [torch.tensor([0, 3]), torch.tensor([0])]
     mask = [torch.tensor([False, True]), torch.tensor([True])]
     metric_kwargs = {"predictions": predictions, "gold_labels": targets, "mask": mask}
     desired_accuracy = 0.5
     run_distributed_test(
         [-1, -1],
         global_distributed_metric,
         CategoricalAccuracy(top_k=2),
         metric_kwargs,
         desired_accuracy,
         exact=False,
     )
Exemplo n.º 18
0
 def test_distributed_nli(self):
     nli_probabilities = 0.6 * torch.eye(3)
     expected_scores = {
         "net_neutral": 0.6 / 3,
         "fraction_neutral": 1 / 3,
         "threshold_0.5": 1 / 3,
         "threshold_0.7": 0.0,
     }
     metric_kwargs = {"nli_probabilities": [nli_probabilities, nli_probabilities]}
     run_distributed_test(
         [-1, -1],
         global_distributed_metric,
         NaturalLanguageInference(0),
         metric_kwargs,
         expected_scores,
         exact=False,
     )
Exemplo n.º 19
0
 def test_multiple_distributed_runs(self):
     predictions = [
         torch.tensor([[1.0, 1.5, 1.0], [2.0, 3.0, 3.5]]),
         torch.tensor([[4.0, 5.0, 5.5], [6.0, 7.0, 7.5]]),
     ]
     targets = [
         torch.tensor([[0.0, 1.0, 0.0], [2.0, 2.0, 0.0]]),
         torch.tensor([[4.0, 5.0, 0.0], [7.0, 7.0, 0.0]]),
     ]
     metric_kwargs = {"predictions": predictions, "gold_labels": targets}
     desired_values = {"mae": 21.0 / 12.0}
     run_distributed_test(
         [-1, -1],
         multiple_runs,
         MeanAbsoluteError(),
         metric_kwargs,
         desired_values,
         exact=True,
     )
Exemplo n.º 20
0
 def test_multiple_distributed_runs(self):
     predictions = [
         torch.tensor(
             [[0.35, 0.25, 0.1, 0.1, 0.2], [0.1, 0.6, 0.1, 0.2, 0.0], [0.1, 0.6, 0.1, 0.2, 0.0]]
         ),
         torch.tensor(
             [[0.1, 0.5, 0.1, 0.2, 0.0], [0.1, 0.2, 0.1, 0.7, 0.0], [0.1, 0.6, 0.1, 0.2, 0.0]]
         ),
     ]
     targets = [torch.tensor([0, 4, 1]), torch.tensor([0, 3, 0])]
     metric_kwargs = {"predictions": predictions, "gold_labels": targets}
     desired_metrics = {
         "precision": self.desired_precisions,
         "recall": self.desired_recalls,
         "fscore": self.desired_fscores,
     }
     run_distributed_test(
         [-1, -1], multiple_runs, FBetaMeasure(), metric_kwargs, desired_metrics, exact=False,
     )
    def test_distributed_setting_throws_an_error(self):
        from allennlp_models.structured_prediction.models.srl import (
            convert_bio_tags_to_conll_format, )

        batch_verb_indices = [2]
        batch_sentences = [["The", "cat", "loves", "hats", "."]]
        batch_bio_predicted_tags = [["B-ARG0", "B-ARG1", "B-V", "B-ARG1", "O"]]
        batch_conll_predicted_tags = [
            convert_bio_tags_to_conll_format(tags)
            for tags in batch_bio_predicted_tags
        ]
        batch_bio_gold_tags = [["B-ARG0", "I-ARG0", "B-V", "B-ARG1", "O"]]
        batch_conll_gold_tags = [
            convert_bio_tags_to_conll_format(tags)
            for tags in batch_bio_gold_tags
        ]

        metric_kwargs = {
            "batch_verb_indices": [batch_verb_indices, batch_verb_indices],
            "batch_sentences": [batch_sentences, batch_sentences],
            "batch_conll_formatted_predicted_tags": [
                batch_conll_predicted_tags,
                batch_conll_predicted_tags,
            ],
            "batch_conll_formatted_gold_tags":
            [batch_conll_gold_tags, batch_conll_gold_tags],
        }

        desired_values = {}  # it does not matter, we expect the run to fail.

        with pytest.raises(Exception) as exc:
            run_distributed_test(
                [-1, -1],
                global_distributed_metric,
                SrlEvalScorer(ignore_classes=["V"]),
                metric_kwargs,
                desired_values,
                exact=True,
            )
            assert (
                "RuntimeError: Distributed aggregation for `SrlEvalScorer` is currently not supported."
                in str(exc.value))
Exemplo n.º 22
0
    def test_distributed_squad_em_and_f1(self):
        best_span_string = ["this is the best span", "this is another span"]
        answer_strings = [
            ["this is a good span", "something irrelevant"],
            ["this is another span", "this one is less perfect"],
        ]

        metric_kwargs = {
            "best_span_string": best_span_string,
            "answer_strings": answer_strings
        }
        desired_values = (1 / 2, 1.75 / 2)
        run_distributed_test(
            [-1, -1],
            global_distributed_metric,
            SquadEmAndF1(),
            metric_kwargs,
            desired_values,
            exact=True,
        )
Exemplo n.º 23
0
 def test_distributed_accuracy(self):
     logits = [
         torch.tensor([[0.35, 0.25, 0.1, 0.1, 0.2]]),
         torch.tensor([[0.1, 0.6, 0.1, 0.2, 0.0]]),
     ]
     labels = [torch.tensor([[0]]), torch.tensor([[3]])]
     label_weights = [torch.tensor([[1 / 3]]), torch.tensor([[2 / 3]])]
     metric_kwargs = {
         "logits": logits,
         "labels": labels,
         "label_weights": label_weights
     }
     desired_accuracy = {"score": (1 / 3) / 2}
     run_distributed_test(
         [-1, -1],
         global_distributed_metric,
         VqaMeasure(),
         metric_kwargs,
         desired_accuracy,
         exact=False,
     )
Exemplo n.º 24
0
    def test_distributed_sufficiency_masked_computation(self):
        C = torch.zeros(3, 3).long()
        Y = torch.eye(3).long()
        A = Y
        mask = torch.ones_like(C).bool()

        expected_kl_divs = {0: {0: 0.4055, 1: 1.0986}, 1: {0: np.nan, 1: np.nan}}
        metric_kwargs = {
            "predicted_labels": C,
            "gold_labels": Y,
            "protected_variable_labels": A,
            "mask": mask,
        }
        run_distributed_test(
            [-1, -1],
            global_distributed_metric,
            Sufficiency(2, 2),
            metric_kwargs,
            expected_kl_divs,
            exact=False,
        )
Exemplo n.º 25
0
    def test_distributed_pearson(self):
        batch_size = 10
        num_labels = 10
        predictions = torch.randn(batch_size, num_labels)
        labels = 0.5 * predictions + torch.randn(batch_size, num_labels)

        expected_pearson_correlation = pearson_corrcoef(
            predictions.view(-1).cpu().numpy(),
            labels.view(-1).cpu().numpy(),
        )
        predictions = [predictions[:5], predictions[5:]]
        labels = [labels[:5], labels[5:]]
        metric_kwargs = {"predictions": predictions, "gold_labels": labels}
        run_distributed_test(
            [-1, -1],
            global_distributed_metric,
            PearsonCorrelation(),
            metric_kwargs,
            expected_pearson_correlation,
            exact=(0.0001, 1e-01),
        )
Exemplo n.º 26
0
 def test_distributed_fbeta_measure(self):
     predictions = [
         torch.tensor([[0.35, 0.25, 0.1, 0.1,
                        0.2], [0.1, 0.6, 0.1, 0.2, 0.0],
                       [0.1, 0.6, 0.1, 0.2, 0.0]]),
         torch.tensor([[0.1, 0.5, 0.1, 0.2, 0.0], [0.1, 0.2, 0.1, 0.7, 0.0],
                       [0.1, 0.6, 0.1, 0.2, 0.0]]),
     ]
     targets = [torch.tensor([0, 4, 1]), torch.tensor([0, 3, 0])]
     metric_kwargs = {"predictions": predictions, "gold_labels": targets}
     desired_metrics = {
         "precision": 1.0,
         "recall": 0.333333333,
         "f1": 0.499999999,
     }
     run_distributed_test(
         [-1, -1],
         global_distributed_metric,
         F1Measure(positive_label=0),
         metric_kwargs,
         desired_metrics,
         exact=False,
     )
    def test_distributed_loading_and_training(self, mixed_precision,
                                              flatten_parameters):
        run_distributed_test(
            [0, 1],
            func=_dist_load_and_train,
            test_dir=self.TEST_DIR,
            mixed_precision=mixed_precision,
            flatten_parameters=flatten_parameters,
        )

        # Now make sure the sharded saved state is exactly the same as the original state when consolidated.
        original_state = torch.load(self.TEST_DIR / "state.pt",
                                    map_location="cpu")
        consolidated_state = FairScaleFsdpWrappedModel.consolidate_sharded_state(
            [
                self.TEST_DIR / "state_worker0.pt",
                self.TEST_DIR / "state_worker1.pt",
            ])

        assert set(original_state.keys()) - set(consolidated_state.keys()) == {
            "decoder.linear.weight"  # won't be in the state dict since param is tied to embedding.weight
        }

        for key, tensor0 in original_state.items():
            if key not in consolidated_state:
                continue
            # Need to give extra tolerance for buffers when `mixed_precision` is `True`.
            tolerance = None if not mixed_precision or "buffer" not in key else 1e-3
            tensor1 = consolidated_state[key]
            assert_allclose(
                tensor0,
                tensor1,
                msg=
                f"{key} is off in consolidated state.\nExpected:\n{tensor0}\nGot:\n{tensor1}",
                atol=tolerance,
                rtol=tolerance,
            )
Exemplo n.º 28
0
 def test_distributed_evalb(self):
     tree1 = Tree.fromstring(
         "(S (VP (D the) (NP dog)) (VP (V chased) (NP (D the) (N cat))))")
     tree2 = Tree.fromstring(
         "(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
     predicted_trees = [[tree1], [tree2]]
     gold_trees = [[tree2], [tree2]]
     metric_kwargs = {
         "predicted_trees": predicted_trees,
         "gold_trees": gold_trees
     }
     desired_values = {
         "evalb_recall": 0.875,
         "evalb_precision": 0.875,
         "evalb_f1_measure": 0.875,
     }
     run_distributed_test(
         [-1, -1],
         global_distributed_metric,
         EvalbBracketingScorer(),
         metric_kwargs,
         desired_values,
         exact=True,
     )
Exemplo n.º 29
0
    def test_distributed_mention_recall(self):
        batched_top_spans = [
            torch.tensor([[[2, 4], [1, 3]]]),
            torch.tensor([[[5, 6], [7, 8]]])
        ]
        batched_metadata = [[{
            "clusters": [[(2, 4), (3, 5)]]
        }], [{
            "clusters": [[(5, 6), (7, 8)]]
        }]]

        metric_kwargs = {
            "batched_top_spans": batched_top_spans,
            "batched_metadata": batched_metadata,
        }
        desired_values = 0.75
        run_distributed_test(
            [-1, -1],
            global_distributed_metric,
            MentionRecall(),
            metric_kwargs,
            desired_values,
            exact=True,
        )
Exemplo n.º 30
0
    def test_distributed_drop_em_and_f1(self):
        prediction = ["this is the best span", "this is another span"]
        ground_truths = [
            [{
                "spans": ["this is a good span", "something irrelevant"]
            }],
            [{
                "spans": ["this is another span"]
            }],
        ]

        metric_kwargs = {
            "prediction": prediction,
            "ground_truths": ground_truths
        }
        desired_values = (1 / 2, 1.38 / 2)
        run_distributed_test(
            [-1, -1],
            global_distributed_metric,
            DropEmAndF1(),
            metric_kwargs,
            desired_values,
            exact=True,
        )