def _bleu(ref_file, trans_file, subword_option=None, lower_case=False):
    """Compute BLEU scores and handling BPE."""
    max_order = 4
    smooth = False

    ref_files = [ref_file]
    reference_text = []
    for reference_filename in ref_files:
        with codecs.getreader("utf-8")(tf.gfile.GFile(reference_filename,
                                                      "rb")) as fh:
            reference_text.append(fh.readlines())

    per_segment_references = []
    for references in zip(*reference_text):
        reference_list = []
        for reference in references:
            reference = _clean(reference,
                               subword_option,
                               lower_case=lower_case)
            reference_list.append(reference.split())
        per_segment_references.append(reference_list)

    translations = []
    with codecs.getreader("utf-8")(tf.gfile.GFile(trans_file, "rb")) as fh:
        for line in fh:
            line = _clean(line, subword_option=None, lower_case=lower_case)
            translations.append(line.split())

    # bleu_score, precisions, bp, ratio, translation_length, reference_length
    bleu_score, _, _, _, _, _ = bleu.compute_bleu(per_segment_references,
                                                  translations, max_order,
                                                  smooth)
    return 100 * bleu_score
Exemplo n.º 2
0
def _bleu(ref_file, trans_file, subword_option=None):
    max_order = 4
    smooth = False

    ref_files = [ref_file]
    reference_text = []
    for reference_filename in ref_files:
        with codecs.getreader("utf-8")(tf.gfile.GFile(reference_filename,
                                                      "rb")) as f:
            reference_text.append(f.readlines())

    per_segment_references = []
    for references in zip(*reference_text):
        reference_list = []
        for reference in references:
            reference = _clean(reference, subword_option)
            reference_list.append(reference.split(" "))
        per_segment_references.append(reference_list)

    translations = []
    with codecs.getreader("utf-8")(tf.gfile.GFile(trans_file, "rb")) as f:
        for line in f:
            line = _clean(line, subword_option=None)
            translations.append(line.split(" "))

    bleu_score, _, _, _, _, _ = bleu.compute_bleu(per_segment_references,
                                                  translations, max_order,
                                                  smooth)
    return 100 * bleu_score
Exemplo n.º 3
0
def compute_bleu(sample_sentences, tgt_sentences, hparams):
    """Compute bleu score according to predicted and label ids."""
    # sample_sentences: [batch_size, time] / [time, batch_size]
    # tgt_sentences: [batch_size, time]
    if hparams.time_major:
        # [batch_size, time]
        sample_sentences = sample_sentences.T
    translation_corpus, reference_corpus = [], []
    for i in xrange(hparams.batch_size):
        sample_sentence = get_translation(sample_sentences, i, hparams.eos,
                                          hparams.bpe_delimiter)
        tgt_sentence = get_translation(tgt_sentences, i, hparams.eos,
                                       hparams.bpe_delimiter)
        translation_corpus.append(sample_sentence)
        reference_corpus.append([tgt_sentence])
    bleu_score, _, _, _, _, _ = bleu.compute_bleu(reference_corpus,
                                                  translation_corpus,
                                                  max_order=4,
                                                  smooth=False)
    return bleu_score
Exemplo n.º 4
0
def _bleu(ref_file, trans_file, bpe_delimiter=None):
    """Compute BLEU scores and handling BPE."""

    max_order = 4
    for cc in range(1, 5):
        smooth = False
        max_order = cc

        ref_files = [ref_file]
        reference_text = []
        for reference_filename in ref_files:
            with codecs.getreader("utf-8")(tf.io.gfile.GFile(
                    reference_filename, "rb")) as fh:
                reference_text.append(fh.readlines())

        per_segment_references = []
        for references in zip(*reference_text):
            reference_list = []
            for reference in references:
                reference = _clean(reference, bpe_delimiter)
                reference_list.append(reference.split(" "))
            per_segment_references.append(reference_list)

        translations = []
        with codecs.getreader("utf-8")(tf.io.gfile.GFile(trans_file,
                                                         "rb")) as fh:
            for line in fh:
                line = _clean(line, bpe_delimiter)
                translations.append(line.split(" "))

        # bleu_score, precisions, bp, ratio, translation_length, reference_length
        bleu_score, _, _, _, _, _ = bleu.compute_bleu(per_segment_references,
                                                      translations, max_order,
                                                      smooth)

        print(100 * bleu_score)
    return 100 * bleu_score