示例#1
0
    def test_multi_refs(self):
        scorer = bert_score.BERTScorer(lang="en", batch_size=3, rescale_with_baseline=True)

        cands = ["I like lemons."]
        refs = [["I am proud of you.", "I love lemons.", "Go go go."]]
        P_mul, R_mul, F_mul = scorer.score(cands, refs,)
        P_best, R_best, F_best = scorer.score(cands, [refs[0][1]],)
        self.assertTensorsAlmostEqual(P_mul, P_best)
        self.assertTensorsAlmostEqual(R_mul, R_best)
        self.assertTensorsAlmostEqual(F_mul, F_best)
示例#2
0
    def test_idf_scorer(self):
        scorer = bert_score.BERTScorer(lang="en", idf=True, idf_sents=refs, batch_size=3)

        (P, R, F), hash_code = scorer.score(cands, refs, return_hash=True)
        self.assertTrue(torch.is_tensor(P))
        self.assertTrue(torch.is_tensor(R))
        self.assertTrue(torch.is_tensor(F))
        self.assertEqual(hash_code, f'roberta-large_L17_idf_version={bert_score.__version__}(hug_trans={ht_version})')
        self.assertTrue((P - torch.tensor([0.9837872385978699, 0.9754738807678223, 0.8947395086288452])).abs_().max() < EPS)
        self.assertTrue((R - torch.tensor([0.9827190637588501, 0.9697767496109009, 0.9172918796539307])).abs_().max() < EPS)
        self.assertTrue((F - torch.tensor([0.9832529425621033, 0.972616970539093, 0.9058753848075867])).abs_().max() < EPS)
示例#3
0
    def test_scorer(self):
        scorer = bert_score.BERTScorer(lang="en", batch_size=3)

        (P, R, F), hash_code = scorer.score(cands, refs, return_hash=True)
        self.assertTrue(torch.is_tensor(P))
        self.assertTrue(torch.is_tensor(R))
        self.assertTrue(torch.is_tensor(F))
        self.assertEqual(hash_code, f'roberta-large_L17_no-idf_version={bert_score.__version__}(hug_trans={ht_version})')
        self.assertTrue((P - torch.tensor([0.9843302369117737, 0.9832239747047424, 0.9120386242866516])).abs_().max() < EPS)
        self.assertTrue((R - torch.tensor([0.9823839068412781, 0.9732863903045654, 0.920428991317749])).abs_().max() < EPS)
        self.assertTrue((F - torch.tensor([0.9833561182022095, 0.9782299995422363, 0.916214644908905])).abs_().max() < EPS)
示例#4
0
    def test_idf_scorer_rescale(self):
        scorer = bert_score.BERTScorer(lang="en", rescale_with_baseline=True, idf=True, idf_sents=refs, batch_size=3)

        (P, R, F), hash_code = scorer.score(cands, refs, return_hash=True)
        self.assertAreTensors(P, R, F)
        self.assertTensorsAlmostEqual(P, [0.903778135776520, 0.854439020156860, 0.375287383794785])
        self.assertTensorsAlmostEqual(R, [0.897446095943451, 0.820639789104462, 0.509167850017548])
        self.assertTensorsAlmostEqual(F, [0.900772094726562, 0.837753534317017, 0.442304641008377])
        self.assertEqual(
            hash_code, f"roberta-large_L17_idf_version={bert_score.__version__}(hug_trans={ht_version})-rescaled",
        )
示例#5
0
    def test_scorer_rescale(self):
        scorer = bert_score.BERTScorer(lang="en", rescale_with_baseline=True, batch_size=3)

        (P, R, F), hash_code = scorer.score(cands, refs, return_hash=True)
        self.assertAreTensors(P, R, F)
        self.assertTensorsAlmostEqual(P, [0.907000780105591, 0.900435566902161, 0.477955609560013])
        self.assertTensorsAlmostEqual(R, [0.895456790924072, 0.841467440128326, 0.527785062789917])
        self.assertTensorsAlmostEqual(F, [0.901383399963379, 0.871010780334473, 0.503565192222595])
        self.assertEqual(
            hash_code, f"roberta-large_L17_no-idf_version={bert_score.__version__}(hug_trans={ht_version})-rescaled",
        )
示例#6
0
    def test_idf_scorer(self):
        scorer = bert_score.BERTScorer(lang="en", idf=True, idf_sents=refs, batch_size=3)

        (P, R, F), hash_code = scorer.score(cands, refs, return_hash=True)
        self.assertAreTensors(P, R, F)
        self.assertTensorsAlmostEqual(P, [0.9837872385978699, 0.9754738807678223, 0.8947395086288452])
        self.assertTensorsAlmostEqual(R, [0.9827190637588501, 0.9697767496109009, 0.9172918796539307])
        self.assertTensorsAlmostEqual(F, [0.9832529425621033, 0.972616970539093, 0.9058753848075867])
        self.assertEqual(
            hash_code, f"roberta-large_L17_idf_version={bert_score.__version__}(hug_trans={ht_version})",
        )
示例#7
0
    def test_scorer(self):
        scorer = bert_score.BERTScorer(lang="en", batch_size=3)

        (P, R, F), hash_code = scorer.score(cands, refs, return_hash=True)
        self.assertAreTensors(P, R, F)
        self.assertTensorsAlmostEqual(P, [0.9843302369117737, 0.9832239747047424, 0.9120386242866516])
        self.assertTensorsAlmostEqual(R, [0.9823839068412781, 0.9732863903045654, 0.920428991317749])
        self.assertTensorsAlmostEqual(F, [0.9833561182022095, 0.9782299995422363, 0.916214644908905])
        self.assertEqual(
            hash_code, f"roberta-large_L17_no-idf_version={bert_score.__version__}(hug_trans={ht_version})",
        )
示例#8
0
    def test_multi_refs_working(self):
        scorer = bert_score.BERTScorer(lang="en", batch_size=3, rescale_with_baseline=True)

        cands = ["I like lemons.", "Hi", "Hey", "Hello", "Go", ""]
        refs = [
            ["I am proud of you.", "I love lemons.", "Go go go."],
            ["I am proud of you.", "Go go go."],
            ["Hi", ""],
            ["I am proud of you.", "I love lemons.", "Go go go.", "hello"],
            ["I am proud of you.", "Go go go.", "Go", "Go to school"],
            ["test"],
        ]
        P_mul, R_mul, F_mul = scorer.score(cands, refs,)
        self.assertAreTensors(P_mul, R_mul, F_mul)
示例#9
0
    def __init__(self,
                 min_bert_score,
                 model="bert-base-uncased",
                 score_type="f1"):
        if not isinstance(min_bert_score, float):
            raise TypeError("max_bleu_score must be a float")
        if min_bert_score < 0.0 or min_bert_score > 1.0:
            raise ValueError(
                "max_bert_score must be a value between 0.0 and 1.0")

        self.min_bert_score = min_bert_score
        self.model = model
        self.score_type = score_type
        # Turn off idf-weighting scheme b/c reference sentence set is small
        self._bert_scorer = bert_score.BERTScorer(model_type=model,
                                                  idf=False,
                                                  device=utils.device)