def score_dataset(model, dataset, print_params=None, output_file=None): """Outputs the model scores for the dataset. Args: model: A :class:`opennmt.models.Model` instance. dataset: A ``tf.data.Dataset`` instance outputting parallel features and labels. print_params: A dictionary of parameters passed to :meth:`opennmt.models.Model.print_score`. output_file: If set, outputs are saved in this file, otherwise they are printed on the standard output. """ if output_file: stream = open(output_file, encoding="utf-8", mode="w") else: stream = sys.stdout write_fn = lambda batch: (model.print_score( batch, params=print_params, stream=stream)) index_fn = lambda batch: batch.get("index") ordered_writer = misc.OrderRestorer(index_fn, write_fn) score_fn = tf.function(model.score, input_signature=dataset.element_spec) for features, labels in dataset: results = score_fn(features, labels) results = tf.nest.map_structure(lambda t: t.numpy(), results) for batch in misc.extract_batches(results): ordered_writer.push(batch) if output_file: stream.close()
def score(self, features_file, predictions_file, checkpoint_path=None, output_file=None): """Scores existing predictions. Args: features_file: The input file. predictions_file: The predictions file to score. checkpoint_path: Path of a specific checkpoint to use. If ``None``, the latest is used. output_file: The file where the scores are saved. Otherwise, they will be printed on the standard output. """ checkpoint, config = self._init_run() checkpoint.restore(checkpoint_path=checkpoint_path, weights_only=True) model = checkpoint.model score_config = config["score"] dataset = model.examples_inputter.make_evaluation_dataset( features_file, predictions_file, score_config["batch_size"], prefetch_buffer_size=score_config.get("prefetch_buffer_size")) if output_file: stream = io.open(output_file, encoding="utf-8", mode="w") else: stream = sys.stdout score_fn = tf.function(model.score, input_signature=dataset.element_spec) for features, labels in dataset: results = score_fn(features, labels) results = tf.nest.map_structure(lambda t: t.numpy(), results) for batch in misc.extract_batches(results): model.print_score(batch, params=score_config, stream=stream) if output_file: stream.close()
def after_run(self, run_context, run_values): # pylint: disable=unused-argument predictions, self._current_step = run_values.results self._output_path = "{}.{}".format(self._output_file, self._current_step) with open(self._output_path, "a") as output_file: for prediction in misc.extract_batches(predictions): self._model.print_prediction(prediction, stream=output_file)
def __call__(self, step): """Runs the evaluator. Args: step: The current training step. Returns: A dictionary of evaluation metrics. """ tf.get_logger().info("Running evaluation for step %d", step) output_file = None output_path = None if self._save_predictions: output_path = os.path.join(self._eval_dir, "predictions.txt.%d" % step) output_file = tf.io.gfile.GFile(output_path, "w") loss_num = 0 loss_den = 0 metrics = self._model.get_metrics() for source, target in self._dataset: loss, predictions = self._eval_fn(source, target) if isinstance(loss, tuple): loss_num += loss[0] loss_den += loss[1] else: loss_num += loss loss_den += 1 if metrics: self._model.update_metrics(metrics, predictions, target) if output_file is not None: predictions = { k: v.numpy() for k, v in six.iteritems(predictions) } for prediction in misc.extract_batches(predictions): self._model.print_prediction(prediction, stream=output_file) if loss_den == 0: raise RuntimeError("No examples were evaluated") loss = loss_num / loss_den results = dict(loss=loss, perplexity=tf.math.exp(loss)) if metrics: for name, metric in six.iteritems(metrics): results[name] = metric.result() if self._save_predictions: tf.get_logger().info("Evaluation predictions saved to %s", output_path) output_file.close() for scorer in self._scorers: score = scorer(self._labels_file, output_path) if isinstance(score, dict): results.update(score) else: results[scorer.name] = score return self._record_results(step, results)
def infer_list(self, features_list: List[List[str]], checkpoint_path: Optional[str] = None) -> List[List[str]]: config = self._finalize_config() model: Model = self._init_model(config) checkpoint = Checkpoint.from_config(config, model) checkpoint.restore(checkpoint_path=checkpoint_path, weights_only=True) infer_config = config["infer"] dataset = make_inference_dataset( model, features_list, infer_config["batch_size"], length_bucket_width=infer_config["length_bucket_width"], prefetch_buffer_size=infer_config.get("prefetch_buffer_size"), ) infer_fn = tf.function(model.infer, input_signature=(dataset.element_spec, )) if not tf.config.functions_run_eagerly(): tf.get_logger().info( "Tracing and optimizing the inference graph...") infer_fn.get_concrete_function() # Trace the function now. results: List[List[str]] = [[""]] * len(features_list[0]) for source in dataset: predictions = infer_fn(source) predictions = tf.nest.map_structure(lambda t: t.numpy(), predictions) for prediction in extract_batches(predictions): index: int = prediction["index"] num_hypotheses = len(prediction["log_probs"]) hypotheses: List[str] = [] for i in range(num_hypotheses): if "tokens" in prediction: target_length = prediction["length"][i] tokens = prediction["tokens"][i][:target_length] sentence = model.labels_inputter.tokenizer.detokenize( tokens) else: sentence = prediction["text"][i] hypotheses.append(sentence) results[index] = hypotheses return results
def infer_multiple( self, features_paths: List[Union[str, List[str]]], predictions_paths: List[str], checkpoint_path: Optional[str] = None, ) -> None: config = self._finalize_config() model: Model = self._init_model(config) checkpoint = Checkpoint.from_config(config, model) checkpoint.restore(checkpoint_path=checkpoint_path, weights_only=True) infer_config = config["infer"] for features_path, predictions_path in zip(features_paths, predictions_paths): dataset = model.examples_inputter.make_inference_dataset( features_path, infer_config["batch_size"], length_bucket_width=infer_config["length_bucket_width"], prefetch_buffer_size=infer_config.get("prefetch_buffer_size"), ) with open(predictions_path, encoding="utf-8", mode="w") as stream: infer_fn = tf.function( model.infer, input_signature=(dataset.element_spec, )) if not tf.config.functions_run_eagerly(): tf.get_logger().info( "Tracing and optimizing the inference graph...") infer_fn.get_concrete_function() # Trace the function now. # Inference might return out-of-order predictions. The OrderRestorer utility is # used to write predictions in their original order. ordered_writer = OrderRestorer( lambda pred: pred.get("index"), lambda pred: (model.print_prediction( pred, params=infer_config, stream=stream)), ) for source in dataset: predictions = infer_fn(source) predictions = tf.nest.map_structure( lambda t: t.numpy(), predictions) for prediction in extract_batches(predictions): ordered_writer.push(prediction)
def score(self, features_file, predictions_file, checkpoint_path=None, output_file=None): """Scores existing predictions. Args: features_file: The input file. predictions_file: The predictions file to score. checkpoint_path: Path of a specific checkpoint to use. If ``None``, the latest is used. output_file: The file where the scores are saved. Otherwise, they will be printed on the standard output. """ checkpoint, config = self._init_run() checkpoint.restore(checkpoint_path=checkpoint_path, weights_only=True) model = checkpoint.model score_config = config["score"] dataset = model.examples_inputter.make_evaluation_dataset( features_file, predictions_file, score_config["batch_size"], prefetch_buffer_size=score_config.get("prefetch_buffer_size")) @dataset_util.function_on_next(dataset, as_numpy=True) def _score(next_fn): features, labels = next_fn() return model.score(features, labels) if output_file: stream = io.open(output_file, encoding="utf-8", mode="w") else: stream = sys.stdout for results in _score(): # pylint: disable=no-value-for-parameter for batch in misc.extract_batches(results): model.print_score(batch, params=score_config, stream=stream) if output_file: stream.close()
def _testGenericModel(self, model, mode, features_file, labels_file=None, data_config=None, batch_size=16, prediction_heads=None, metrics=None, params=None): # Mainly test that the code does not throw. if params is None: params = model.auto_config()["params"] if data_config is None: data_config = {} model.initialize(data_config, params=params) model.create_variables() # Build a dataset for mode. if mode == tf.estimator.ModeKeys.PREDICT: dataset = model.examples_inputter.make_inference_dataset( features_file, batch_size) elif mode == tf.estimator.ModeKeys.EVAL: dataset = model.examples_inputter.make_evaluation_dataset( features_file, labels_file, batch_size) elif mode == tf.estimator.ModeKeys.TRAIN: dataset = model.examples_inputter.make_training_dataset( features_file, labels_file, batch_size) # Forward first batch into the model. data = iter(dataset).next() if mode != tf.estimator.ModeKeys.PREDICT: features, labels = data else: features, labels = data, None training = mode == tf.estimator.ModeKeys.TRAIN outputs, predictions = model(features, labels=labels, training=training) if mode != tf.estimator.ModeKeys.PREDICT: loss = model.compute_loss(outputs, labels, training=training) if mode == tf.estimator.ModeKeys.EVAL: # Check that returned evaluation metrics are expected. eval_metrics = model.get_metrics() if eval_metrics is not None: model.update_metrics(eval_metrics, predictions, labels) for metric in metrics: self.assertIn(metric, eval_metrics) try: # Check that scores can be computed and printed without errors. scores = model.score(features, labels) first_score = tf.nest.map_structure( lambda x: x.numpy(), next(misc.extract_batches(scores))) with open(os.devnull, "w") as devnull: model.print_score(first_score, stream=devnull) except NotImplementedError: pass else: # Check that all prediction heads are returned. self.assertIsInstance(predictions, dict) if prediction_heads is not None: for head in prediction_heads: self.assertIn(head, predictions) # Check that the prediction can be printed without errors. first_prediction = tf.nest.map_structure( lambda x: x.numpy(), next(misc.extract_batches(predictions))) with open(os.devnull, "w") as devnull: model.print_prediction(first_prediction, stream=devnull)
def _analyze( self, inputs_list: List[lit_types.JsonDict]) -> List[lit_types.JsonDict]: features_list: List[str] = list( map(lambda input: encode_sp(self.src_spp, input["src_text"]), inputs_list)) infer_config: dict = self.config["infer"] dataset = make_inference_dataset( self.model, features_list, infer_config["batch_size"], length_bucket_width=infer_config["length_bucket_width"], prefetch_buffer_size=infer_config.get("prefetch_buffer_size"), ) if self._analyze_fn is None: self._analyze_fn = tf.function( self.model.analyze, input_signature=(dataset.element_spec, )) if not tf.config.functions_run_eagerly(): tf.get_logger().info( "Tracing and optimizing the analyze graph...") self._analyze_fn.get_concrete_function( ) # Trace the function now. results: List[lit_types.JsonDict] = [None] * len(features_list) for features in dataset: predictions = self._analyze_fn(features) top_k_probs, top_k_ids = tf.nn.top_k(tf.nn.softmax( predictions["logits"]), k=10) del predictions["logits"] predictions["top_k_probs"] = top_k_probs predictions["top_k_ids"] = top_k_ids masks = tf.sequence_mask(features["length"], maxlen=tf.shape(features["ids"])[1]) predictions["encoder_final_embedding"] = masked_token_mean( predictions["encoder_outputs"], masks) del predictions["encoder_outputs"] predictions = tf.nest.map_structure(lambda t: t.numpy(), predictions) for prediction in extract_batches(predictions): index: int = prediction["index"] target_length = prediction["length"] trg_tokens = prediction["tokens"][:target_length] tok_trg_text = self.model.labels_inputter.tokenizer.detokenize( trg_tokens) trg_text = decode_sp(tok_trg_text) attention = prediction["alignment"][:target_length] probs = prediction["top_k_probs"] ids = prediction["top_k_ids"] pred_tokens = list( self._convert_top_k(ids, probs, target_length)) encoder_final_embedding = prediction["encoder_final_embedding"] ref_text = inputs_list[index]["ref_text"] tok_ref_text = encode_sp(self.trg_spp, ref_text) ter_score = sacrebleu.sentence_ter(tok_trg_text, [tok_ref_text]) chrf_score = sacrebleu.sentence_chrf(trg_text, [ref_text], order=3) results[index] = { "trg_tokens": [t.decode("utf-8") for t in trg_tokens], "trg_text": trg_text, "attention": np.expand_dims(attention, axis=0), "src_tokens": features_list[index].split(), "pred_tokens": pred_tokens, "encoder_final_embedding": encoder_final_embedding, "ter": ter_score.score, "chrf3": chrf_score.score, } return results
def score(self, features_file, predictions_file, checkpoint_path=None): """Scores existing predictions. Args: features_file: The input file. predictions_file: The predictions file to score. checkpoint_path: Path of a specific checkpoint to use. If ``None``, the latest is used. Raises: ValueError: if no checkpoint are found or if the model is not a sequence to sequence model. """ if not hasattr(self._model, "target_inputter"): raise ValueError("scoring only works for sequence to sequence models") if checkpoint_path is None: checkpoint_path = tf.train.latest_checkpoint(self._estimator.model_dir) elif os.path.isdir(checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(checkpoint_path) if checkpoint_path is None: raise ValueError("could not find a trained model in %s" % self._estimator.model_dir) if "score" not in self._config: self._config["score"] = {} batch_size = self._config["score"].get("batch_size", 64) input_fn = self._model.input_fn( tf.estimator.ModeKeys.EVAL, batch_size, self._config["data"], features_file, labels_file=predictions_file, num_threads=self._config["score"].get("num_threads"), prefetch_buffer_size=self._config["score"].get("prefetch_buffer_size")) with tf.Graph().as_default() as g: tf.train.create_global_step(g) features, labels = input_fn() with tf.variable_scope(self._model.name): logits, _ = self._model( features, labels, self._estimator.params, tf.estimator.ModeKeys.EVAL) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=labels["ids_out"]) weights = tf.sequence_mask(labels["length"], dtype=cross_entropy.dtype) masked_cross_entropy = cross_entropy * weights scores = (tf.reduce_sum(masked_cross_entropy, axis=1) / tf.cast(labels["length"], cross_entropy.dtype)) results = { "score": scores, "tokens": labels["tokens"], "length": labels["length"] - 1 # For -1, see sequence_to_sequence.shift_target_sequence. } with tf.train.MonitoredSession( session_creator=tf.train.ChiefSessionCreator( checkpoint_filename_with_path=checkpoint_path, config=self._estimator.config.session_config)) as sess: while not sess.should_stop(): for batch in extract_batches(sess.run(results)): tokens = batch["tokens"][:batch["length"]] sentence = self._model.target_inputter.tokenizer.detokenize(tokens) fmt = "%f ||| %s" % (batch["score"], sentence) print_bytes(tf.compat.as_bytes(fmt))
def infer(self, features_file, predictions_file=None, checkpoint_path=None, log_time=False): """Runs inference. Args: features_file: The file(s) to infer from. predictions_file: If set, predictions are saved in this file. checkpoint_path: Path of a specific checkpoint to predict. If ``None``, the latest is used. log_time: If ``True``, several time metrics will be printed in the logs at the end of the inference loop. """ checkpoint, config = self._init_run() checkpoint.restore(checkpoint_path=checkpoint_path, weights_only=True) model = checkpoint.model infer_config = config["infer"] dataset = model.examples_inputter.make_inference_dataset( features_file, infer_config["batch_size"], length_bucket_width=infer_config["length_bucket_width"], prefetch_buffer_size=infer_config.get("prefetch_buffer_size")) @dataset_util.function_on_next(dataset, as_numpy=True) def _predict(next_fn): source = next_fn() return model.infer(source) if predictions_file: stream = io.open(predictions_file, encoding="utf-8", mode="w") else: stream = sys.stdout ordered_writer = None write_fn = lambda prediction: (model.print_prediction( prediction, params=infer_config, stream=stream)) total_time = 0 total_tokens = 0 total_examples = 0 start_time = time.time() for predictions in _predict(): # pylint: disable=no-value-for-parameter end_time = time.time() if log_time: total_time += end_time - start_time batch_size = next(six.itervalues(predictions)).shape[0] total_examples += batch_size length = predictions.get("length") if length is not None: if len(length.shape) == 2: length = length[:, 0] total_tokens += sum(length) for prediction in misc.extract_batches(predictions): if "index" in prediction: if ordered_writer is None: ordered_writer = misc.OrderRestorer( index_fn=lambda prediction: prediction["index"], callback_fn=write_fn) ordered_writer.push(prediction) else: write_fn(prediction) start_time = time.time() if log_time: tf.get_logger().info("Total prediction time (s): %f", total_time) tf.get_logger().info("Average prediction time (s): %f", total_time / total_examples) if total_tokens > 0: tf.get_logger().info("Tokens per second: %f", total_tokens / total_time) if predictions_file: stream.close()
def infer(self, features_file, predictions_file=None, checkpoint_path=None, log_time=False): """Runs inference. Args: features_file: The file(s) to infer from. predictions_file: If set, predictions are saved in this file. checkpoint_path: Path of a specific checkpoint to predict. If ``None``, the latest is used. log_time: If ``True``, several time metrics will be printed in the logs at the end of the inference loop. """ config = self._finalize_config() model = self._init_model(config) checkpoint = checkpoint_util.Checkpoint.from_config(config, model) checkpoint.restore(checkpoint_path=checkpoint_path, weights_only=True) infer_config = config["infer"] dataset = model.examples_inputter.make_inference_dataset( features_file, infer_config["batch_size"], length_bucket_width=infer_config["length_bucket_width"], prefetch_buffer_size=infer_config.get("prefetch_buffer_size")) if predictions_file: stream = open(predictions_file, encoding="utf-8", mode="w") else: stream = sys.stdout infer_fn = tf.function(model.infer, input_signature=(dataset.element_spec, )) tf.get_logger().info("Tracing and optimizing the inference graph...") infer_fn.get_concrete_function() # Trace the function now. # Inference might return out-of-order predictions. The OrderRestorer utility is # used to write predictions in their original order. write_fn = lambda prediction: (model.print_prediction( prediction, params=infer_config, stream=stream)) index_fn = lambda prediction: prediction.get("index") ordered_writer = misc.OrderRestorer(index_fn, write_fn) total_time = 0 total_tokens = 0 total_examples = 0 start_time = time.time() # When the inference dataset is bucketized, it can happen that no output is # written in a long time. To avoid confusion and give the impression that # the process is stuck, we ensure that something is logged regularly. max_time_without_output = 10 last_output_time = start_time for source in dataset: predictions = infer_fn(source) predictions = tf.nest.map_structure(lambda t: t.numpy(), predictions) batch_time = time.time() for prediction in misc.extract_batches(predictions): written = ordered_writer.push(prediction) if written: last_output_time = batch_time else: time_without_output = batch_time - last_output_time if time_without_output >= max_time_without_output: tf.get_logger().info( "%d predictions are buffered, but waiting for the prediction of " "line %d to advance the output...", ordered_writer.buffer_size, ordered_writer.next_index + 1) last_output_time = batch_time if log_time: batch_size = next(iter(predictions.values())).shape[0] total_examples += batch_size length = predictions.get("length") if length is not None: if len(length.shape) == 2: length = length[:, 0] total_tokens += sum(length) if log_time: end_time = time.time() total_time = end_time - start_time tf.get_logger().info("Total prediction time (s): %f", total_time) tf.get_logger().info("Average prediction time (s): %f", total_time / total_examples) if total_tokens > 0: tf.get_logger().info("Tokens per second: %f", total_tokens / total_time) if predictions_file: stream.close()
def score(self, features_file, predictions_file, checkpoint_path=None): """Scores existing predictions. Args: features_file: The input file. predictions_file: The predictions file to score. checkpoint_path: Path of a specific checkpoint to use. If ``None``, the latest is used. Raises: ValueError: if no checkpoint are found or if the model is not a sequence to sequence model. """ if not hasattr(self._model, "target_inputter"): raise ValueError("scoring only works for sequence to sequence models") if checkpoint_path is None: checkpoint_path = tf.train.latest_checkpoint(self._config["model_dir"]) elif tf.gfile.IsDirectory(checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(checkpoint_path) if checkpoint_path is None: raise ValueError("could not find a trained model in %s" % self._config["model_dir"]) input_fn = self._model.input_fn( tf.estimator.ModeKeys.EVAL, self._config["score"]["batch_size"], self._config["data"], features_file, labels_file=predictions_file, num_threads=self._config["score"].get("num_threads"), prefetch_buffer_size=self._config["score"].get("prefetch_buffer_size")) with tf.Graph().as_default() as g: tf.train.create_global_step(g) features, labels = input_fn() labels["alignment"] = None # Add alignment key to force the model to return attention. with tf.variable_scope(self._model.name): outputs, _ = self._model( features, labels, self._config["params"], tf.estimator.ModeKeys.EVAL) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=outputs["logits"], labels=labels["ids_out"]) weights = tf.sequence_mask(labels["length"], dtype=cross_entropy.dtype) masked_cross_entropy = cross_entropy * weights scores = tf.reduce_sum(masked_cross_entropy, axis=1) results = { "attention": outputs["attention"], "cross_entropy": cross_entropy, "score": scores, "tokens": labels["tokens"], "length": labels["length"] - 1 # For -1, see sequence_to_sequence.shift_target_sequence. } with tf.train.MonitoredSession( session_creator=tf.train.ChiefSessionCreator( checkpoint_filename_with_path=checkpoint_path, config=self._session_config)) as sess: while not sess.should_stop(): for batch in misc.extract_batches(sess.run(results)): tokens = batch["tokens"][:batch["length"]] sentence = self._model.target_inputter.tokenizer.detokenize(tokens) token_level_scores = None if self._config["score"].get("with_token_level"): token_level_scores = batch["cross_entropy"][:batch["length"]] alignment_type = self._config["score"].get("with_alignments") sentence = format_translation_output( sentence, score=batch["score"], token_level_scores=token_level_scores, attention=batch["attention"][:batch["length"]], alignment_type=alignment_type) misc.print_bytes(tf.compat.as_bytes(sentence))
def predict_dataset(model, dataset, print_params=None, predictions_file=None, log_time=False): """Outputs the model predictions for the dataset. To run inference on strings directly, see :meth:`opennmt.models.Model.serve_function`. Args: model: A :class:`opennmt.models.Model` instance. dataset: A ``tf.data.Dataset`` instance outputting features. print_params: A dictionary of parameters passed to :meth:`opennmt.models.Model.print_prediction`. predictions_file: If set, predictions are saved in this file, otherwise they are printed on the standard output. log_time: If ``True``, several time metrics will be printed in the logs at the end of the inference loop. """ if predictions_file: stream = open(predictions_file, encoding="utf-8", mode="w") else: stream = sys.stdout infer_fn = tf.function(model.infer, input_signature=(dataset.element_spec, )) if not tf.config.functions_run_eagerly(): tf.get_logger().info("Tracing and optimizing the inference graph...") infer_fn.get_concrete_function() # Trace the function now. # Inference might return out-of-order predictions. The OrderRestorer utility is # used to write predictions in their original order. write_fn = lambda prediction: (model.print_prediction( prediction, params=print_params, stream=stream)) index_fn = lambda prediction: prediction.get("index") ordered_writer = misc.OrderRestorer(index_fn, write_fn) total_time = 0 total_tokens = 0 total_examples = 0 start_time = time.time() # When the inference dataset is bucketized, it can happen that no output is # written in a long time. To avoid confusion and give the impression that # the process is stuck, we ensure that something is logged regularly. max_time_without_output = 10 last_output_time = start_time for features in dataset: predictions = infer_fn(features) predictions = tf.nest.map_structure(lambda t: t.numpy(), predictions) batch_time = time.time() for prediction in misc.extract_batches(predictions): written = ordered_writer.push(prediction) if written: last_output_time = batch_time else: time_without_output = batch_time - last_output_time if time_without_output >= max_time_without_output: tf.get_logger().info( "%d predictions are buffered, but waiting for the prediction of " "line %d to advance the output...", ordered_writer.buffer_size, ordered_writer.next_index + 1, ) last_output_time = batch_time if log_time: batch_size = next(iter(predictions.values())).shape[0] total_examples += batch_size length = predictions.get("length") if length is not None: if len(length.shape) == 2: length = length[:, 0] total_tokens += sum(length) if log_time: end_time = time.time() total_time = end_time - start_time tf.get_logger().info("Total prediction time (s): %f", total_time) tf.get_logger().info("Average prediction time (s): %f", total_time / total_examples) if total_tokens > 0: tf.get_logger().info("Tokens per second: %f", total_tokens / total_time) if predictions_file: stream.close()
def after_run(self, run_context, run_values): # pylint: disable=unused-argument predictions, self._current_step = run_values.results self._output_path = "{}.{}".format(self._output_file, self._current_step) with open(self._output_path, "a") as output_file: for prediction in misc.extract_batches(predictions): self._model.print_prediction(prediction, stream=output_file)
def __call__(self, step): """Runs the evaluator. Args: step: The current training step. Returns: A dictionary of evaluation metrics. """ tf.get_logger().info("Running evaluation for step %d", step) output_file = None output_path = None if self._save_predictions: output_path = os.path.join(self._eval_dir, "predictions.txt.%d" % step) output_file = tf.io.gfile.GFile(output_path, "w") params = {"n_best": 1} write_fn = lambda prediction: (self._model.print_prediction( prediction, params=params, stream=output_file)) index_fn = lambda prediction: prediction.get("index") ordered_writer = misc.OrderRestorer(index_fn, write_fn) loss_num = 0 loss_den = 0 metrics = self._model.get_metrics() for source, target in self._dataset: loss, predictions = self._eval_fn(source, target) if isinstance(loss, tuple): loss_num += loss[0] loss_den += loss[1] else: loss_num += loss loss_den += 1 if metrics: self._model.update_metrics(metrics, predictions, target) if output_file is not None: predictions = {k: v.numpy() for k, v in predictions.items()} for prediction in misc.extract_batches(predictions): ordered_writer.push(prediction) if loss_den == 0: raise RuntimeError("No examples were evaluated") loss = loss_num / loss_den results = dict(loss=loss, perplexity=tf.math.exp(loss)) if metrics: for name, metric in metrics.items(): results[name] = metric.result() if self._save_predictions: tf.get_logger().info("Evaluation predictions saved to %s", output_path) output_file.close() for scorer in self._scorers: score = scorer(self._labels_file, output_path) if isinstance(score, dict): results.update(score) else: results[scorer.name] = score for name, value in results.items(): if isinstance(value, tf.Tensor): results[name] = value.numpy() self._record_results(step, results) self._maybe_export(step, results) self._maybe_garbage_collect_exports() return results
def score(self, features_file, predictions_file, checkpoint_path=None, output_file=None): """Scores existing predictions. Args: features_file: The input file. predictions_file: The predictions file to score. checkpoint_path: Path of a specific checkpoint to use. If ``None``, the latest is used. output_file: The file where the scores are saved. Otherwise, they will be printed on the standard output. Raises: ValueError: if no checkpoint are found or if the model is not a sequence to sequence model. """ if not isinstance(self._model, (models.LanguageModel, models.SequenceToSequence)): raise ValueError( "scoring only works for sequence to sequence or language models" ) if checkpoint_path is None: checkpoint_path = tf.train.latest_checkpoint( self._config["model_dir"]) elif tf.gfile.IsDirectory(checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(checkpoint_path) if checkpoint_path is None: raise ValueError("could not find a trained model in %s" % self._config["model_dir"]) model = copy.deepcopy(self._model) with tf.Graph().as_default(): dataset = model.examples_inputter.make_evaluation_dataset( features_file, predictions_file, self._config["score"]["batch_size"], num_threads=self._config["score"].get("num_threads"), prefetch_buffer_size=self._config["score"].get( "prefetch_buffer_size")) iterator = dataset.make_initializable_iterator() features, labels = iterator.get_next() labels[ "alignment"] = None # Add alignment key to force the model to return attention. outputs, _ = model(features, labels, self._config["params"], tf.estimator.ModeKeys.EVAL) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=outputs["logits"], labels=labels["ids_out"]) weights = tf.sequence_mask(labels["length"], dtype=cross_entropy.dtype) masked_cross_entropy = cross_entropy * weights scores = (tf.reduce_sum(masked_cross_entropy, axis=1) / tf.cast(labels["length"], cross_entropy.dtype)) results = { "cross_entropy": cross_entropy, "score": scores, "tokens": labels["tokens"], "length": labels["length"] - 1 # -1 for the special token. } if "attention" in outputs: results["attention"] = outputs["attention"] if output_file: stream = io.open(output_file, encoding="utf-8", mode="w") else: stream = sys.stdout output_tokenizer = (self._model.labels_inputter.tokenizer if not self._model.unsupervised else self._model.features_inputter.tokenizer) with tf.train.MonitoredSession( session_creator=tf.train.ChiefSessionCreator( checkpoint_filename_with_path=checkpoint_path, config=self._session_config)) as sess: sess.run(iterator.initializer) while not sess.should_stop(): for batch in misc.extract_batches(sess.run(results)): tokens = batch["tokens"][:batch["length"]] sentence = output_tokenizer.detokenize(tokens) token_level_scores = None attention = None if self._config["score"].get("with_token_level"): token_level_scores = batch[ "cross_entropy"][:batch["length"]] if "attention" in batch: attention = batch["attention"][:batch["length"]] alignment_type = self._config["score"].get( "with_alignments") sentence = format_translation_output( sentence, score=batch["score"], token_level_scores=token_level_scores, attention=attention, alignment_type=alignment_type) misc.print_bytes(tf.compat.as_bytes(sentence), stream=stream) if output_file: stream.close()
def infer(self, features_file, predictions_file=None, checkpoint_path=None, log_time=False): """Runs inference. Args: features_file: The file(s) to infer from. predictions_file: If set, predictions are saved in this file. checkpoint_path: Path of a specific checkpoint to predict. If ``None``, the latest is used. log_time: If ``True``, several time metrics will be printed in the logs at the end of the inference loop. """ checkpoint, config = self._init_run() checkpoint.restore(checkpoint_path=checkpoint_path, weights_only=True) model = checkpoint.model infer_config = config["infer"] dataset = model.examples_inputter.make_inference_dataset( features_file, infer_config["batch_size"], length_bucket_width=infer_config["length_bucket_width"], prefetch_buffer_size=infer_config.get("prefetch_buffer_size")) if predictions_file: stream = open(predictions_file, encoding="utf-8", mode="w") else: stream = sys.stdout infer_fn = tf.function(model.infer, input_signature=(dataset.element_spec, )) infer_fn.get_concrete_function() # Trace the function now. # Inference might return out-of-order predictions. The OrderRestorer utility is # used to write predictions in their original order. write_fn = lambda prediction: (model.print_prediction( prediction, params=infer_config, stream=stream)) index_fn = lambda prediction: prediction.get("index") ordered_writer = misc.OrderRestorer(index_fn, write_fn) total_time = 0 total_tokens = 0 total_examples = 0 start_time = time.time() for source in dataset: predictions = infer_fn(source) predictions = tf.nest.map_structure(lambda t: t.numpy(), predictions) for prediction in misc.extract_batches(predictions): ordered_writer.push(prediction) if log_time: batch_size = next(iter(predictions.values())).shape[0] total_examples += batch_size length = predictions.get("length") if length is not None: if len(length.shape) == 2: length = length[:, 0] total_tokens += sum(length) if log_time: end_time = time.time() total_time = end_time - start_time tf.get_logger().info("Total prediction time (s): %f", total_time) tf.get_logger().info("Average prediction time (s): %f", total_time / total_examples) if total_tokens > 0: tf.get_logger().info("Tokens per second: %f", total_tokens / total_time) if predictions_file: stream.close()