def test_bleu_different(self): ref = self._create_temp_file("Testing\nmore tests!") hyp = self._create_temp_file("Dog\nCat") uncased_score = compute_bleu.bleu_wrapper(ref, hyp, False) cased_score = compute_bleu.bleu_wrapper(ref, hyp, True) self.assertLess(uncased_score, 100) self.assertLess(cased_score, 100)
def test_bleu_same_different_case(self): ref = self._create_temp_file("Test 1 two 3\nmore tests!") hyp = self._create_temp_file("test 1 two 3\nMore tests!") uncased_score = compute_bleu.bleu_wrapper(ref, hyp, False) cased_score = compute_bleu.bleu_wrapper(ref, hyp, True) self.assertEqual(100, uncased_score) self.assertLess(cased_score, 100)
def translate_and_compute_bleu(estimator, subtokenizer, bleu_source, bleu_ref): """Translate file and report the cased and uncased bleu scores.""" # Create temporary file to store translation. tmp = tempfile.NamedTemporaryFile(delete=False) tmp_filename = tmp.name translate.translate_file(estimator, subtokenizer, bleu_source, output_file=tmp_filename, print_all_translations=False) # Compute uncased and cased bleu scores. uncased_score = compute_bleu.bleu_wrapper(bleu_ref, tmp_filename, False) cased_score = compute_bleu.bleu_wrapper(bleu_ref, tmp_filename, True) os.remove(tmp_filename) return uncased_score, cased_score
def translate_and_compute_bleu(model, params, subtokenizer, bleu_source, bleu_ref, distribution_strategy=None): """Translate file and report the cased and uncased bleu scores. Args: model: A Keras model, used to generate the translations. params: A dictionary, containing the translation related parameters. subtokenizer: A subtokenizer object, used for encoding and decoding source and translated lines. bleu_source: A file containing source sentences for translation. bleu_ref: A file containing the reference for the translated sentences. distribution_strategy: A platform distribution strategy, used for TPU based translation. Returns: uncased_score: A float, the case insensitive BLEU score. cased_score: A float, the case sensitive BLEU score. """ # Create temporary file to store translation. tmp = tempfile.NamedTemporaryFile(delete=False) tmp_filename = tmp.name translate.translate_file( model, params, subtokenizer, bleu_source, output_file=tmp_filename, print_all_translations=False, distribution_strategy=distribution_strategy) # Compute uncased and cased bleu scores. uncased_score = compute_bleu.bleu_wrapper(bleu_ref, tmp_filename, False) cased_score = compute_bleu.bleu_wrapper(bleu_ref, tmp_filename, True) os.remove(tmp_filename) return uncased_score, cased_score
def evaluate_bleu_score(self, case_sensitive=False): ref_filename = tempfile.NamedTemporaryFile(delete=False) hyp_filename = tempfile.NamedTemporaryFile(delete=False) with tf.io.gfile.GFile(ref_filename.name, 'w') as f_ref: with tf.io.gfile.GFile(hyp_filename.name, 'w') as f_hyp: for references, hypothesis_output in zip(self.ref_sents , self.hyp_sents): f_hyp.write(hypothesis_output+'\n') f_ref.write(references+'\n') try: bleu_score = compute_bleu.bleu_wrapper(ref_filename = ref_filename.name, hyp_filename = hyp_filename.name, case_sensitive = False) except: log.warning('Some problem while calculating BLEU score so setting it to zero') bleu_score = 0 return bleu_score