示例#1
0
def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False):
  """Compute BLEU for two files (reference and hypothesis translation)."""
  ref_lines = tokenizer.native_to_unicode(
      tf.io.gfile.GFile(ref_filename).read()).strip().splitlines()
  hyp_lines = tokenizer.native_to_unicode(
      tf.io.gfile.GFile(hyp_filename).read()).strip().splitlines()
  return bleu_on_list(ref_lines, hyp_lines, case_sensitive)
def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False):
    """Compute BLEU for two files (reference and hypothesis translation)."""
    ref_lines = tokenizer.native_to_unicode(
        tf.io.gfile.GFile(ref_filename).read()).strip().splitlines()
    hyp_lines = tokenizer.native_to_unicode(
        tf.io.gfile.GFile(hyp_filename).read()).strip().splitlines()

    if len(ref_lines) != len(hyp_lines):
        raise ValueError(
            "Reference and translation files have different number of "
            "lines. If training only a few steps (100-200), the "
            "translation may be empty.")
    if not case_sensitive:
        ref_lines = [x.lower() for x in ref_lines]
        hyp_lines = [x.lower() for x in hyp_lines]
    ref_tokens = [bleu_tokenize(x) for x in ref_lines]
    hyp_tokens = [bleu_tokenize(x) for x in hyp_lines]
    return metrics.compute_bleu(ref_tokens, hyp_tokens) * 100