def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False): """Compute BLEU for two files (reference and hypothesis translation).""" ref_lines = tf.gfile.Open(ref_filename).read().strip().splitlines() hyp_lines = tf.gfile.Open(hyp_filename).read().strip().splitlines() if len(ref_lines) != len(hyp_lines): raise ValueError("Reference and translation files have different number of " "lines.") if not case_sensitive: ref_lines = [x.lower() for x in ref_lines] hyp_lines = [x.lower() for x in hyp_lines] ref_tokens = [bleu_tokenize(x) for x in ref_lines] hyp_tokens = [bleu_tokenize(x) for x in hyp_lines] return metrics.compute_bleu(ref_tokens, hyp_tokens) * 100
def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False): """Compute BLEU for two files (reference and hypothesis translation).""" ref_lines = tokenizer.native_to_unicode( tf.io.gfile.GFile(ref_filename).read()).strip().splitlines() hyp_lines = tokenizer.native_to_unicode( tf.io.gfile.GFile(hyp_filename).read()).strip().splitlines() if len(ref_lines) != len(hyp_lines): raise ValueError("Reference and translation files have different number of " "lines. If training only a few steps (100-200), the " "translation may be empty.") if not case_sensitive: ref_lines = [x.lower() for x in ref_lines] hyp_lines = [x.lower() for x in hyp_lines] ref_tokens = [bleu_tokenize(x) for x in ref_lines] hyp_tokens = [bleu_tokenize(x) for x in hyp_lines] return metrics.compute_bleu(ref_tokens, hyp_tokens) * 100