示例#1
0
def test_bleu_batch_macro():
    bleu = Bleu(ngram=4)

    # Batch size 3
    hypotheses = [corpus.cand_1, corpus.cand_2a, corpus.cand_2b]
    refs = [corpus.references_1, corpus.references_2, corpus.references_2]
    bleu.update((hypotheses, refs))

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        reference_bleu_score = (sentence_bleu(refs[0], hypotheses[0]) +
                                sentence_bleu(refs[1], hypotheses[1]) +
                                sentence_bleu(refs[2], hypotheses[2])) / 3
    assert pytest.approx(bleu.compute()) == reference_bleu_score

    value = 0
    for _hypotheses, _refs in zip(hypotheses, refs):
        value += bleu._sentence_bleu(_refs, _hypotheses)
        bleu.update(([_hypotheses], [_refs]))

    ref_1 = value / len(refs)
    ref_2 = bleu.compute()

    assert pytest.approx(ref_1) == reference_bleu_score
    assert pytest.approx(ref_2) == reference_bleu_score
示例#2
0
def _test(candidates,
          references,
          average,
          smooth="no_smooth",
          smooth_nltk_fn=None,
          ngram_range=8):
    for i in range(1, ngram_range):
        weights = tuple([1 / i] * i)
        bleu = Bleu(ngram=i, average=average, smooth=smooth)

        if average == "macro":
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                reference = sentence_bleu(references[0],
                                          candidates[0],
                                          weights=weights,
                                          smoothing_function=smooth_nltk_fn)
            assert pytest.approx(reference) == bleu._sentence_bleu(
                references[0], candidates[0])

        elif average == "micro":
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                reference = corpus_bleu(references,
                                        candidates,
                                        weights=weights,
                                        smoothing_function=smooth_nltk_fn)
            assert pytest.approx(reference) == bleu._corpus_bleu(
                references, candidates)

        bleu.update((candidates, references))
        assert pytest.approx(reference) == bleu.compute()
示例#3
0
def test_accumulation_macro_bleu():
    bleu = Bleu(ngram=4, smooth="smooth2")
    bleu.update(([corpus.cand_1], [corpus.references_1]))
    bleu.update(([corpus.cand_2a], [corpus.references_2]))
    bleu.update(([corpus.cand_2b], [corpus.references_2]))
    bleu.update(([corpus.cand_3], [corpus.references_2]))
    value = bleu._sentence_bleu(corpus.references_1, corpus.cand_1)
    value += bleu._sentence_bleu(corpus.references_2, corpus.cand_2a)
    value += bleu._sentence_bleu(corpus.references_2, corpus.cand_2b)
    value += bleu._sentence_bleu(corpus.references_2, corpus.cand_3)
    assert bleu.compute() == value / 4
示例#4
0
def test_bleu():
    bleu = Bleu(ngram=4, smooth="smooth2")
    bleu.update((corpus.cand_1, corpus.references_1))
    bleu.update((corpus.cand_2a, corpus.references_2))
    bleu.update((corpus.cand_2b, corpus.references_2))
    bleu.update((corpus.cand_3, corpus.references_2))
    value = bleu._corpus_bleu([corpus.references_1], [corpus.cand_1])
    value += bleu._corpus_bleu([corpus.references_2], [corpus.cand_2a])
    value += bleu._corpus_bleu([corpus.references_2], [corpus.cand_2b])
    value += bleu._corpus_bleu([corpus.references_2], [corpus.cand_3])
    assert bleu.compute() == value / 4
示例#5
0
def test_corpus_bleu(candidate, references):
    print(candidate, references)
    for i in range(1, 8):
        weights = tuple([1 / i] * i)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            reference = corpus_bleu(references, candidate, weights=weights)
        bleu = Bleu(ngram=i)
        assert pytest.approx(reference) == bleu._corpus_bleu(
            references, candidate)
        bleu.update((candidate[0], references[0]))
        assert pytest.approx(reference) == bleu.compute()
示例#6
0
def test_accumulation_micro_bleu():
    bleu = Bleu(ngram=4, smooth="smooth2", average="micro")
    bleu.update(([corpus.cand_1], [corpus.references_1]))
    bleu.update(([corpus.cand_2a], [corpus.references_2]))
    bleu.update(([corpus.cand_2b], [corpus.references_2]))
    bleu.update(([corpus.cand_3], [corpus.references_2]))
    value = bleu._corpus_bleu(
        [
            corpus.references_1, corpus.references_2, corpus.references_2,
            corpus.references_2
        ],
        [corpus.cand_1, corpus.cand_2a, corpus.cand_2b, corpus.cand_3],
    )
    assert bleu.compute() == value
示例#7
0
def test_corpus_bleu_nltk_smooth2(candidate, references):
    for i in range(1, 8):
        weights = tuple([1 / i] * i)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            reference = corpus_bleu(
                references,
                candidate,
                weights=weights,
                smoothing_function=SmoothingFunction().method2)
        bleu = Bleu(ngram=i, smooth="nltk_smooth2")
        assert reference == bleu._corpus_bleu(references, candidate)
        bleu.update((candidate[0], references[0]))
        assert reference == bleu.compute()
示例#8
0
def test_bleu_batch_micro():
    bleu = Bleu(ngram=4, average="micro")

    # Batch size 3
    hypotheses = [corpus.cand_1, corpus.cand_2a, corpus.cand_2b]
    refs = [corpus.references_1, corpus.references_2, corpus.references_2]
    bleu.update((hypotheses, refs))

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        reference_bleu_score = corpus_bleu(refs, hypotheses)
    assert pytest.approx(bleu.compute()) == reference_bleu_score

    assert pytest.approx(bleu._corpus_bleu(refs,
                                           hypotheses)) == reference_bleu_score