def test_bleu_batch_macro(): bleu = Bleu(ngram=4) # Batch size 3 hypotheses = [corpus.cand_1, corpus.cand_2a, corpus.cand_2b] refs = [corpus.references_1, corpus.references_2, corpus.references_2] bleu.update((hypotheses, refs)) with warnings.catch_warnings(): warnings.simplefilter("ignore") reference_bleu_score = (sentence_bleu(refs[0], hypotheses[0]) + sentence_bleu(refs[1], hypotheses[1]) + sentence_bleu(refs[2], hypotheses[2])) / 3 assert pytest.approx(bleu.compute()) == reference_bleu_score value = 0 for _hypotheses, _refs in zip(hypotheses, refs): value += bleu._sentence_bleu(_refs, _hypotheses) bleu.update(([_hypotheses], [_refs])) ref_1 = value / len(refs) ref_2 = bleu.compute() assert pytest.approx(ref_1) == reference_bleu_score assert pytest.approx(ref_2) == reference_bleu_score
def _test(candidates, references, average, smooth="no_smooth", smooth_nltk_fn=None, ngram_range=8): for i in range(1, ngram_range): weights = tuple([1 / i] * i) bleu = Bleu(ngram=i, average=average, smooth=smooth) if average == "macro": with warnings.catch_warnings(): warnings.simplefilter("ignore") reference = sentence_bleu(references[0], candidates[0], weights=weights, smoothing_function=smooth_nltk_fn) assert pytest.approx(reference) == bleu._sentence_bleu( references[0], candidates[0]) elif average == "micro": with warnings.catch_warnings(): warnings.simplefilter("ignore") reference = corpus_bleu(references, candidates, weights=weights, smoothing_function=smooth_nltk_fn) assert pytest.approx(reference) == bleu._corpus_bleu( references, candidates) bleu.update((candidates, references)) assert pytest.approx(reference) == bleu.compute()
def test_accumulation_macro_bleu(): bleu = Bleu(ngram=4, smooth="smooth2") bleu.update(([corpus.cand_1], [corpus.references_1])) bleu.update(([corpus.cand_2a], [corpus.references_2])) bleu.update(([corpus.cand_2b], [corpus.references_2])) bleu.update(([corpus.cand_3], [corpus.references_2])) value = bleu._sentence_bleu(corpus.references_1, corpus.cand_1) value += bleu._sentence_bleu(corpus.references_2, corpus.cand_2a) value += bleu._sentence_bleu(corpus.references_2, corpus.cand_2b) value += bleu._sentence_bleu(corpus.references_2, corpus.cand_3) assert bleu.compute() == value / 4
def test_bleu(): bleu = Bleu(ngram=4, smooth="smooth2") bleu.update((corpus.cand_1, corpus.references_1)) bleu.update((corpus.cand_2a, corpus.references_2)) bleu.update((corpus.cand_2b, corpus.references_2)) bleu.update((corpus.cand_3, corpus.references_2)) value = bleu._corpus_bleu([corpus.references_1], [corpus.cand_1]) value += bleu._corpus_bleu([corpus.references_2], [corpus.cand_2a]) value += bleu._corpus_bleu([corpus.references_2], [corpus.cand_2b]) value += bleu._corpus_bleu([corpus.references_2], [corpus.cand_3]) assert bleu.compute() == value / 4
def test_corpus_bleu(candidate, references): print(candidate, references) for i in range(1, 8): weights = tuple([1 / i] * i) with warnings.catch_warnings(): warnings.simplefilter("ignore") reference = corpus_bleu(references, candidate, weights=weights) bleu = Bleu(ngram=i) assert pytest.approx(reference) == bleu._corpus_bleu( references, candidate) bleu.update((candidate[0], references[0])) assert pytest.approx(reference) == bleu.compute()
def test_accumulation_micro_bleu(): bleu = Bleu(ngram=4, smooth="smooth2", average="micro") bleu.update(([corpus.cand_1], [corpus.references_1])) bleu.update(([corpus.cand_2a], [corpus.references_2])) bleu.update(([corpus.cand_2b], [corpus.references_2])) bleu.update(([corpus.cand_3], [corpus.references_2])) value = bleu._corpus_bleu( [ corpus.references_1, corpus.references_2, corpus.references_2, corpus.references_2 ], [corpus.cand_1, corpus.cand_2a, corpus.cand_2b, corpus.cand_3], ) assert bleu.compute() == value
def test_corpus_bleu_nltk_smooth2(candidate, references): for i in range(1, 8): weights = tuple([1 / i] * i) with warnings.catch_warnings(): warnings.simplefilter("ignore") reference = corpus_bleu( references, candidate, weights=weights, smoothing_function=SmoothingFunction().method2) bleu = Bleu(ngram=i, smooth="nltk_smooth2") assert reference == bleu._corpus_bleu(references, candidate) bleu.update((candidate[0], references[0])) assert reference == bleu.compute()
def test_bleu_batch_micro(): bleu = Bleu(ngram=4, average="micro") # Batch size 3 hypotheses = [corpus.cand_1, corpus.cand_2a, corpus.cand_2b] refs = [corpus.references_1, corpus.references_2, corpus.references_2] bleu.update((hypotheses, refs)) with warnings.catch_warnings(): warnings.simplefilter("ignore") reference_bleu_score = corpus_bleu(refs, hypotheses) assert pytest.approx(bleu.compute()) == reference_bleu_score assert pytest.approx(bleu._corpus_bleu(refs, hypotheses)) == reference_bleu_score