def evaluate_and_log_bleu(model, params, bleu_source, bleu_ref, vocab_file, distribution_strategy=None): """Calculate and record the BLEU score. Args: model: A Keras model, used to generate the translations. params: A dictionary, containing the translation related parameters. bleu_source: A file containing source sentences for translation. bleu_ref: A file containing the reference for the translated sentences. vocab_file: A file containing the vocabulary for translation. distribution_strategy: A platform distribution strategy, used for TPU based translation. Returns: uncased_score: A float, the case insensitive BLEU score. cased_score: A float, the case sensitive BLEU score. """ subtokenizer = tokenizer.Subtokenizer(vocab_file) uncased_score, cased_score = translate_and_compute_bleu( model, params, subtokenizer, bleu_source, bleu_ref, distribution_strategy) logging.info("Bleu score (uncased): %s", uncased_score) logging.info("Bleu score (cased): %s", cased_score) return uncased_score, cased_score
def _init_subtokenizer(self, vocab_list): temp_file = tempfile.NamedTemporaryFile(delete=False) with tf.io.gfile.GFile(temp_file.name, "w") as w: for subtoken in vocab_list: w.write("'%s'" % subtoken) w.write("\n") return tokenizer.Subtokenizer(temp_file.name, reserved_tokens=[])
def evaluate_and_log_bleu(estimator, bleu_source, bleu_ref, vocab_file): """Calculate and record the BLEU score.""" subtokenizer = tokenizer.Subtokenizer(vocab_file) uncased_score, cased_score = translate_and_compute_bleu( estimator, subtokenizer, bleu_source, bleu_ref) tf.logging.info("Bleu score (uncased): %f", uncased_score) tf.logging.info("Bleu score (cased): %f", cased_score) return uncased_score, cased_score
def predict(self): """Predicts result from the model.""" params = self.params flags_obj = self.flags_obj with tf.name_scope("model"): model = transformer.create_model(params, is_train=False) self._load_weights_if_possible( model, tf.train.latest_checkpoint(self.flags_obj.model_dir)) model.summary() subtokenizer = tokenizer.Subtokenizer(flags_obj.vocab_file) ds = data_pipeline.eval_input_fn(params) ds = ds.map(lambda x, y: x).take(_SINGLE_SAMPLE) ret = model.predict(ds) val_outputs, _ = ret length = len(val_outputs) for i in range(length): translate.translate_from_input(val_outputs[i], subtokenizer)
def main(unused_argv): from official.transformer import transformer_main tf.logging.set_verbosity(tf.logging.INFO) if FLAGS.text is None and FLAGS.file is None: tf.logging.warn("Nothing to translate. Make sure to call this script using " "flags --text or --file.") return subtokenizer = tokenizer.Subtokenizer(FLAGS.vocab_file) # Set up estimator and params params = transformer_main.PARAMS_MAP[FLAGS.param_set] params["beam_size"] = _BEAM_SIZE params["alpha"] = _ALPHA params["extra_decode_length"] = _EXTRA_DECODE_LENGTH params["batch_size"] = _DECODE_BATCH_SIZE estimator = tf.estimator.Estimator( model_fn=transformer_main.model_fn, model_dir=FLAGS.model_dir, params=params) if FLAGS.text is not None: tf.logging.info("Translating text: %s" % FLAGS.text) translate_text(estimator, subtokenizer, FLAGS.text) if FLAGS.file is not None: input_file = os.path.abspath(FLAGS.file) tf.logging.info("Translating file: %s" % input_file) if not tf.gfile.Exists(FLAGS.file): raise ValueError("File does not exist: %s" % input_file) output_file = None if FLAGS.file_out is not None: output_file = os.path.abspath(FLAGS.file_out) tf.logging.info("File output specified: %s" % output_file) translate_file(estimator, subtokenizer, input_file, output_file)