Exemplo n.º 1
0
    def _test(metric_device):
        engine = Engine(update)
        m = Bleu(ngram=4, smooth="smooth2", average="micro")
        m.attach(engine, "bleu")

        engine.run(data=list(range(size)), max_epochs=1)

        assert "bleu" in engine.state.metrics

        ref_bleu = 0
        references = []
        candidates = []
        for _candidates, _references in data:
            references.append(_references[0])
            candidates.append(_candidates[0])
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            ref_bleu += corpus_bleu(
                references,
                candidates,
                weights=[0.25, 0.25, 0.25, 0.25],
                smoothing_function=SmoothingFunction().method2,
            )

        assert pytest.approx(engine.state.metrics["bleu"]) == ref_bleu
Exemplo n.º 2
0
def test_bleu_batch_macro():
    bleu = Bleu(ngram=4)

    # Batch size 3
    hypotheses = [corpus.cand_1, corpus.cand_2a, corpus.cand_2b]
    refs = [corpus.references_1, corpus.references_2, corpus.references_2]
    bleu.update((hypotheses, refs))

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        reference_bleu_score = (sentence_bleu(refs[0], hypotheses[0]) +
                                sentence_bleu(refs[1], hypotheses[1]) +
                                sentence_bleu(refs[2], hypotheses[2])) / 3
    assert pytest.approx(bleu.compute()) == reference_bleu_score

    value = 0
    for _hypotheses, _refs in zip(hypotheses, refs):
        value += bleu._sentence_bleu(_refs, _hypotheses)
        bleu.update(([_hypotheses], [_refs]))

    ref_1 = value / len(refs)
    ref_2 = bleu.compute()

    assert pytest.approx(ref_1) == reference_bleu_score
    assert pytest.approx(ref_2) == reference_bleu_score
Exemplo n.º 3
0
def test_n_gram_counter(candidates, references):
    bleu = Bleu(ngram=4)
    hyp_length, ref_length = bleu._n_gram_counter([references], [candidates],
                                                  Counter(), Counter())
    assert hyp_length == len(candidates)

    ref_lens = (len(reference) for reference in references)
    closest_ref_len = min(ref_lens,
                          key=lambda ref_len:
                          (abs(ref_len - len(candidates)), ref_len))

    assert ref_length == closest_ref_len
Exemplo n.º 4
0
def test_accumulation_micro_bleu():
    bleu = Bleu(ngram=4, smooth="smooth2", average="micro")
    bleu.update(([corpus.cand_1], [corpus.references_1]))
    bleu.update(([corpus.cand_2a], [corpus.references_2]))
    bleu.update(([corpus.cand_2b], [corpus.references_2]))
    bleu.update(([corpus.cand_3], [corpus.references_2]))
    value = bleu._corpus_bleu(
        [
            corpus.references_1, corpus.references_2, corpus.references_2,
            corpus.references_2
        ],
        [corpus.cand_1, corpus.cand_2a, corpus.cand_2b, corpus.cand_3],
    )
    assert bleu.compute() == value
Exemplo n.º 5
0
def test_wrong_inputs():

    with pytest.raises(ValueError,
                       match=r"ngram order must be greater than zero"):
        Bleu(ngram=0)

    with pytest.raises(ValueError, match=r"Smooth is not valid"):
        Bleu(smooth="fake")

    with pytest.raises(
            ValueError,
            match=r"nb of candidates should be equal to nb of reference lists"
    ):
        Bleu()._corpus_bleu(references=[[0], [0]], candidates=[[0]])

    with pytest.raises(NotComputableError):
        Bleu().compute()
Exemplo n.º 6
0
def _test(candidates,
          references,
          average,
          smooth="no_smooth",
          smooth_nltk_fn=None,
          ngram_range=8):
    for i in range(1, ngram_range):
        weights = tuple([1 / i] * i)
        bleu = Bleu(ngram=i, average=average, smooth=smooth)

        if average == "macro":
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                reference = sentence_bleu(references[0],
                                          candidates[0],
                                          weights=weights,
                                          smoothing_function=smooth_nltk_fn)
            assert pytest.approx(reference) == bleu._sentence_bleu(
                references[0], candidates[0])

        elif average == "micro":
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                reference = corpus_bleu(references,
                                        candidates,
                                        weights=weights,
                                        smoothing_function=smooth_nltk_fn)
            assert pytest.approx(reference) == bleu._corpus_bleu(
                references, candidates)

        bleu.update((candidates, references))
        assert pytest.approx(reference) == bleu.compute()
Exemplo n.º 7
0
    def _test(metric_device):
        engine = Engine(update)
        m = Bleu(ngram=4, smooth="smooth2")
        m.attach(engine, "bleu")

        engine.run(data=list(range(size)), max_epochs=1)

        assert "bleu" in engine.state.metrics

        ref_bleu = 0
        for candidates, references in data:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                ref_bleu += sentence_bleu(
                    references[0],
                    candidates[0],
                    weights=[0.25, 0.25, 0.25, 0.25],
                    smoothing_function=SmoothingFunction().method2,
                )

        assert pytest.approx(
            engine.state.metrics["bleu"]) == ref_bleu / len(data)
Exemplo n.º 8
0
def test_corpus_bleu(candidate, references):
    print(candidate, references)
    for i in range(1, 8):
        weights = tuple([1 / i] * i)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            reference = corpus_bleu(references, candidate, weights=weights)
        bleu = Bleu(ngram=i)
        assert pytest.approx(reference) == bleu._corpus_bleu(
            references, candidate)
        bleu.update((candidate[0], references[0]))
        assert pytest.approx(reference) == bleu.compute()
Exemplo n.º 9
0
def test_corpus_bleu_nltk_smooth2(candidate, references):
    for i in range(1, 8):
        weights = tuple([1 / i] * i)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            reference = corpus_bleu(
                references,
                candidate,
                weights=weights,
                smoothing_function=SmoothingFunction().method2)
        bleu = Bleu(ngram=i, smooth="nltk_smooth2")
        assert reference == bleu._corpus_bleu(references, candidate)
        bleu.update((candidate[0], references[0]))
        assert reference == bleu.compute()
Exemplo n.º 10
0
def test_bleu_batch_micro():
    bleu = Bleu(ngram=4, average="micro")

    # Batch size 3
    hypotheses = [corpus.cand_1, corpus.cand_2a, corpus.cand_2b]
    refs = [corpus.references_1, corpus.references_2, corpus.references_2]
    bleu.update((hypotheses, refs))

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        reference_bleu_score = corpus_bleu(refs, hypotheses)
    assert pytest.approx(bleu.compute()) == reference_bleu_score

    assert pytest.approx(bleu._corpus_bleu(refs,
                                           hypotheses)) == reference_bleu_score
Exemplo n.º 11
0
def test_accumulation_macro_bleu():
    bleu = Bleu(ngram=4, smooth="smooth2")
    bleu.update(([corpus.cand_1], [corpus.references_1]))
    bleu.update(([corpus.cand_2a], [corpus.references_2]))
    bleu.update(([corpus.cand_2b], [corpus.references_2]))
    bleu.update(([corpus.cand_3], [corpus.references_2]))
    value = bleu._sentence_bleu(corpus.references_1, corpus.cand_1)
    value += bleu._sentence_bleu(corpus.references_2, corpus.cand_2a)
    value += bleu._sentence_bleu(corpus.references_2, corpus.cand_2b)
    value += bleu._sentence_bleu(corpus.references_2, corpus.cand_3)
    assert bleu.compute() == value / 4
Exemplo n.º 12
0
def test_bleu():
    bleu = Bleu(ngram=4, smooth="smooth2")
    bleu.update((corpus.cand_1, corpus.references_1))
    bleu.update((corpus.cand_2a, corpus.references_2))
    bleu.update((corpus.cand_2b, corpus.references_2))
    bleu.update((corpus.cand_3, corpus.references_2))
    value = bleu._corpus_bleu([corpus.references_1], [corpus.cand_1])
    value += bleu._corpus_bleu([corpus.references_2], [corpus.cand_2a])
    value += bleu._corpus_bleu([corpus.references_2], [corpus.cand_2b])
    value += bleu._corpus_bleu([corpus.references_2], [corpus.cand_3])
    assert bleu.compute() == value / 4