示例#1
0
 def test_bleu_different(self):
     ref = self._create_temp_file("Testing\nmore tests!")
     hyp = self._create_temp_file("Dog\nCat")
     uncased_score = compute_bleu.bleu_wrapper(ref, hyp, False)
     cased_score = compute_bleu.bleu_wrapper(ref, hyp, True)
     self.assertLess(uncased_score, 100)
     self.assertLess(cased_score, 100)
示例#2
0
 def test_bleu_same_different_case(self):
     ref = self._create_temp_file("Test 1 two 3\nmore tests!")
     hyp = self._create_temp_file("test 1 two 3\nMore tests!")
     uncased_score = compute_bleu.bleu_wrapper(ref, hyp, False)
     cased_score = compute_bleu.bleu_wrapper(ref, hyp, True)
     self.assertEqual(100, uncased_score)
     self.assertLess(cased_score, 100)
def translate_and_compute_bleu(estimator, subtokenizer, bleu_source, bleu_ref):
  """Translate file and report the cased and uncased bleu scores."""
  # Create temporary file to store translation.
  tmp = tempfile.NamedTemporaryFile(delete=False)
  tmp_filename = tmp.name

  translate.translate_file(
      estimator, subtokenizer, bleu_source, output_file=tmp_filename,
      print_all_translations=False)

  # Compute uncased and cased bleu scores.
  uncased_score = compute_bleu.bleu_wrapper(bleu_ref, tmp_filename, False)
  cased_score = compute_bleu.bleu_wrapper(bleu_ref, tmp_filename, True)
  os.remove(tmp_filename)
  return uncased_score, cased_score
示例#4
0
def translate_and_compute_bleu(estimator, subtokenizer, bleu_source, bleu_ref):
  """Translate file and report the cased and uncased bleu scores."""
  # Create temporary file to store translation.
  tmp = tempfile.NamedTemporaryFile(delete=False)
  tmp_filename = tmp.name

  translate.translate_file(
      estimator, subtokenizer, bleu_source, output_file=tmp_filename,
      print_all_translations=False)

  # Compute uncased and cased bleu scores.
  uncased_score = compute_bleu.bleu_wrapper(bleu_ref, tmp_filename, False)
  cased_score = compute_bleu.bleu_wrapper(bleu_ref, tmp_filename, True)
  os.remove(tmp_filename)
  return uncased_score, cased_score
def translate_and_compute_bleu(model,
                               params,
                               subtokenizer,
                               bleu_source,
                               bleu_ref,
                               distribution_strategy=None):
    """Translate file and report the cased and uncased bleu scores.

    Args:
      model: A Keras model, used to generate the translations.
      params: A dictionary, containing the translation related parameters.
      subtokenizer: A subtokenizer object, used for encoding and decoding source
        and translated lines.
      bleu_source: A file containing source sentences for translation.
      bleu_ref: A file containing the reference for the translated sentences.
      distribution_strategy: A platform distribution strategy, used for TPU based
        translation.

    Returns:
      uncased_score: A float, the case insensitive BLEU score.
      cased_score: A float, the case sensitive BLEU score.
    """
    # Create temporary file to store translation.
    tmp = tempfile.NamedTemporaryFile(delete=False)
    tmp_filename = tmp.name

    translate.translate_file(
        model,
        params,
        subtokenizer,
        bleu_source,
        output_file=tmp_filename,
        print_all_translations=False,
        distribution_strategy=distribution_strategy)

    # Compute uncased and cased bleu scores.
    uncased_score = compute_bleu.bleu_wrapper(bleu_ref, tmp_filename, False)
    cased_score = compute_bleu.bleu_wrapper(bleu_ref, tmp_filename, True)
    os.remove(tmp_filename)
    return uncased_score, cased_score