Exemplo n.º 1
0
def bleu_on_list(ref_lines, hyp_lines, case_sensitive=False):
  """Compute BLEU for two list of strings (reference and hypothesis)."""
  if len(ref_lines) != len(hyp_lines):
    raise ValueError(
        "Reference and translation files have different number of "
        "lines (%d VS %d). If training only a few steps (100-200), the "
        "translation may be empty." % (len(ref_lines), len(hyp_lines)))
  if not case_sensitive:
    ref_lines = [x.lower() for x in ref_lines]
    hyp_lines = [x.lower() for x in hyp_lines]
  ref_tokens = [bleu_tokenize(x) for x in ref_lines]
  hyp_tokens = [bleu_tokenize(x) for x in hyp_lines]
  return metrics.compute_bleu(ref_tokens, hyp_tokens) * 100
Exemplo n.º 2
0
def bleu_on_list(ref_lines, hyp_lines, case_sensitive=False):
    """Compute BLEU for two list of strings (reference and hypothesis)."""
    if len(hyp_lines) == 0:
        return 0.

    len_diff = len(ref_lines) - len(hyp_lines)
    for i in range(len_diff):
        hyp_lines.append('#')

    if not case_sensitive:
        ref_lines = [x.lower() for x in ref_lines]
        hyp_lines = [x.lower() for x in hyp_lines]
    ref_tokens = [bleu_tokenize(x) for x in ref_lines]
    hyp_tokens = [bleu_tokenize(x) for x in hyp_lines]
    return metrics.compute_bleu(ref_tokens, hyp_tokens) * 100
Exemplo n.º 3
0
def bleu_score(logits, labels):
  """Approximate BLEU score computation between labels and predictions.

  An approximate BLEU scoring method since we do not glue word pieces or
  decode the ids and tokenize the output. By default, we use ngram order of 4
  and use brevity penalty. Also, this does not have beam search.

  Args:
    logits: Tensor of size [batch_size, length_logits, vocab_size]
    labels: Tensor of size [batch-size, length_labels]

  Returns:
    bleu: int, approx bleu score
  """
  predictions = np.argmax(logits, axis=-1)
  bleu = metrics.compute_bleu(labels, predictions)
  return bleu
def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False):
    """Compute BLEU for two files (reference and hypothesis translation)."""
    ref_lines = tokenizer.native_to_unicode(
        tf.io.gfile.GFile(ref_filename).read()).strip().splitlines()
    hyp_lines = tokenizer.native_to_unicode(
        tf.io.gfile.GFile(hyp_filename).read()).strip().splitlines()

    if len(ref_lines) != len(hyp_lines):
        raise ValueError(
            "Reference and translation files have different number of "
            "lines. If training only a few steps (100-200), the "
            "translation may be empty.")
    if not case_sensitive:
        ref_lines = [x.lower() for x in ref_lines]
        hyp_lines = [x.lower() for x in hyp_lines]
    ref_tokens = [bleu_tokenize(x) for x in ref_lines]
    hyp_tokens = [bleu_tokenize(x) for x in hyp_lines]
    return metrics.compute_bleu(ref_tokens, hyp_tokens) * 100