Exemplo n.º 1
0
def score_dataset(model, dataset, print_params=None, output_file=None):
    """Outputs the model scores for the dataset.

    Args:
      model: A :class:`opennmt.models.Model` instance.
      dataset: A ``tf.data.Dataset`` instance outputting parallel features and
        labels.
      print_params: A dictionary of parameters passed to
        :meth:`opennmt.models.Model.print_score`.
      output_file: If set, outputs are saved in this file, otherwise they are
        printed on the standard output.
    """
    if output_file:
        stream = open(output_file, encoding="utf-8", mode="w")
    else:
        stream = sys.stdout

    write_fn = lambda batch: (model.print_score(
        batch, params=print_params, stream=stream))
    index_fn = lambda batch: batch.get("index")
    ordered_writer = misc.OrderRestorer(index_fn, write_fn)

    score_fn = tf.function(model.score, input_signature=dataset.element_spec)
    for features, labels in dataset:
        results = score_fn(features, labels)
        results = tf.nest.map_structure(lambda t: t.numpy(), results)
        for batch in misc.extract_batches(results):
            ordered_writer.push(batch)

    if output_file:
        stream.close()
Exemplo n.º 2
0
  def score(self, features_file, predictions_file, checkpoint_path=None, output_file=None):
    """Scores existing predictions.

    Args:
      features_file: The input file.
      predictions_file: The predictions file to score.
      checkpoint_path: Path of a specific checkpoint to use. If ``None``,
        the latest is used.
      output_file: The file where the scores are saved. Otherwise, they will be
        printed on the standard output.
    """
    checkpoint, config = self._init_run()
    checkpoint.restore(checkpoint_path=checkpoint_path, weights_only=True)
    model = checkpoint.model
    score_config = config["score"]
    dataset = model.examples_inputter.make_evaluation_dataset(
        features_file,
        predictions_file,
        score_config["batch_size"],
        prefetch_buffer_size=score_config.get("prefetch_buffer_size"))

    if output_file:
      stream = io.open(output_file, encoding="utf-8", mode="w")
    else:
      stream = sys.stdout

    score_fn = tf.function(model.score, input_signature=dataset.element_spec)
    for features, labels in dataset:
      results = score_fn(features, labels)
      results = tf.nest.map_structure(lambda t: t.numpy(), results)
      for batch in misc.extract_batches(results):
        model.print_score(batch, params=score_config, stream=stream)

    if output_file:
      stream.close()
Exemplo n.º 3
0
 def after_run(self, run_context, run_values):  # pylint: disable=unused-argument
     predictions, self._current_step = run_values.results
     self._output_path = "{}.{}".format(self._output_file,
                                        self._current_step)
     with open(self._output_path, "a") as output_file:
         for prediction in misc.extract_batches(predictions):
             self._model.print_prediction(prediction, stream=output_file)
Exemplo n.º 4
0
    def __call__(self, step):
        """Runs the evaluator.

    Args:
      step: The current training step.

    Returns:
      A dictionary of evaluation metrics.
    """
        tf.get_logger().info("Running evaluation for step %d", step)
        output_file = None
        output_path = None
        if self._save_predictions:
            output_path = os.path.join(self._eval_dir,
                                       "predictions.txt.%d" % step)
            output_file = tf.io.gfile.GFile(output_path, "w")

        loss_num = 0
        loss_den = 0
        metrics = self._model.get_metrics()
        for source, target in self._dataset:
            loss, predictions = self._eval_fn(source, target)
            if isinstance(loss, tuple):
                loss_num += loss[0]
                loss_den += loss[1]
            else:
                loss_num += loss
                loss_den += 1
            if metrics:
                self._model.update_metrics(metrics, predictions, target)
            if output_file is not None:
                predictions = {
                    k: v.numpy()
                    for k, v in six.iteritems(predictions)
                }
                for prediction in misc.extract_batches(predictions):
                    self._model.print_prediction(prediction,
                                                 stream=output_file)
        if loss_den == 0:
            raise RuntimeError("No examples were evaluated")
        loss = loss_num / loss_den

        results = dict(loss=loss, perplexity=tf.math.exp(loss))
        if metrics:
            for name, metric in six.iteritems(metrics):
                results[name] = metric.result()
        if self._save_predictions:
            tf.get_logger().info("Evaluation predictions saved to %s",
                                 output_path)
            output_file.close()
            for scorer in self._scorers:
                score = scorer(self._labels_file, output_path)
                if isinstance(score, dict):
                    results.update(score)
                else:
                    results[scorer.name] = score

        return self._record_results(step, results)
Exemplo n.º 5
0
    def infer_list(self,
                   features_list: List[List[str]],
                   checkpoint_path: Optional[str] = None) -> List[List[str]]:
        config = self._finalize_config()
        model: Model = self._init_model(config)
        checkpoint = Checkpoint.from_config(config, model)
        checkpoint.restore(checkpoint_path=checkpoint_path, weights_only=True)
        infer_config = config["infer"]
        dataset = make_inference_dataset(
            model,
            features_list,
            infer_config["batch_size"],
            length_bucket_width=infer_config["length_bucket_width"],
            prefetch_buffer_size=infer_config.get("prefetch_buffer_size"),
        )

        infer_fn = tf.function(model.infer,
                               input_signature=(dataset.element_spec, ))
        if not tf.config.functions_run_eagerly():
            tf.get_logger().info(
                "Tracing and optimizing the inference graph...")
            infer_fn.get_concrete_function()  # Trace the function now.

        results: List[List[str]] = [[""]] * len(features_list[0])
        for source in dataset:
            predictions = infer_fn(source)
            predictions = tf.nest.map_structure(lambda t: t.numpy(),
                                                predictions)
            for prediction in extract_batches(predictions):
                index: int = prediction["index"]
                num_hypotheses = len(prediction["log_probs"])
                hypotheses: List[str] = []
                for i in range(num_hypotheses):
                    if "tokens" in prediction:
                        target_length = prediction["length"][i]
                        tokens = prediction["tokens"][i][:target_length]
                        sentence = model.labels_inputter.tokenizer.detokenize(
                            tokens)
                    else:
                        sentence = prediction["text"][i]
                    hypotheses.append(sentence)
                results[index] = hypotheses
        return results
Exemplo n.º 6
0
    def infer_multiple(
        self,
        features_paths: List[Union[str, List[str]]],
        predictions_paths: List[str],
        checkpoint_path: Optional[str] = None,
    ) -> None:
        config = self._finalize_config()
        model: Model = self._init_model(config)
        checkpoint = Checkpoint.from_config(config, model)
        checkpoint.restore(checkpoint_path=checkpoint_path, weights_only=True)
        infer_config = config["infer"]
        for features_path, predictions_path in zip(features_paths,
                                                   predictions_paths):
            dataset = model.examples_inputter.make_inference_dataset(
                features_path,
                infer_config["batch_size"],
                length_bucket_width=infer_config["length_bucket_width"],
                prefetch_buffer_size=infer_config.get("prefetch_buffer_size"),
            )

            with open(predictions_path, encoding="utf-8", mode="w") as stream:
                infer_fn = tf.function(
                    model.infer, input_signature=(dataset.element_spec, ))
                if not tf.config.functions_run_eagerly():
                    tf.get_logger().info(
                        "Tracing and optimizing the inference graph...")
                    infer_fn.get_concrete_function()  # Trace the function now.

                # Inference might return out-of-order predictions. The OrderRestorer utility is
                # used to write predictions in their original order.
                ordered_writer = OrderRestorer(
                    lambda pred: pred.get("index"),
                    lambda pred: (model.print_prediction(
                        pred, params=infer_config, stream=stream)),
                )

                for source in dataset:
                    predictions = infer_fn(source)
                    predictions = tf.nest.map_structure(
                        lambda t: t.numpy(), predictions)
                    for prediction in extract_batches(predictions):
                        ordered_writer.push(prediction)
Exemplo n.º 7
0
    def score(self,
              features_file,
              predictions_file,
              checkpoint_path=None,
              output_file=None):
        """Scores existing predictions.

    Args:
      features_file: The input file.
      predictions_file: The predictions file to score.
      checkpoint_path: Path of a specific checkpoint to use. If ``None``,
        the latest is used.
      output_file: The file where the scores are saved. Otherwise, they will be
        printed on the standard output.
    """
        checkpoint, config = self._init_run()
        checkpoint.restore(checkpoint_path=checkpoint_path, weights_only=True)
        model = checkpoint.model
        score_config = config["score"]
        dataset = model.examples_inputter.make_evaluation_dataset(
            features_file,
            predictions_file,
            score_config["batch_size"],
            prefetch_buffer_size=score_config.get("prefetch_buffer_size"))

        @dataset_util.function_on_next(dataset, as_numpy=True)
        def _score(next_fn):
            features, labels = next_fn()
            return model.score(features, labels)

        if output_file:
            stream = io.open(output_file, encoding="utf-8", mode="w")
        else:
            stream = sys.stdout

        for results in _score():  # pylint: disable=no-value-for-parameter
            for batch in misc.extract_batches(results):
                model.print_score(batch, params=score_config, stream=stream)

        if output_file:
            stream.close()
Exemplo n.º 8
0
 def _testGenericModel(self,
                       model,
                       mode,
                       features_file,
                       labels_file=None,
                       data_config=None,
                       batch_size=16,
                       prediction_heads=None,
                       metrics=None,
                       params=None):
   # Mainly test that the code does not throw.
   if params is None:
     params = model.auto_config()["params"]
   if data_config is None:
     data_config = {}
   model.initialize(data_config, params=params)
   model.create_variables()
   # Build a dataset for mode.
   if mode == tf.estimator.ModeKeys.PREDICT:
     dataset = model.examples_inputter.make_inference_dataset(
         features_file, batch_size)
   elif mode == tf.estimator.ModeKeys.EVAL:
     dataset = model.examples_inputter.make_evaluation_dataset(
         features_file, labels_file, batch_size)
   elif mode == tf.estimator.ModeKeys.TRAIN:
     dataset = model.examples_inputter.make_training_dataset(
         features_file, labels_file, batch_size)
   # Forward first batch into the model.
   data = iter(dataset).next()
   if mode != tf.estimator.ModeKeys.PREDICT:
     features, labels = data
   else:
     features, labels = data, None
   training = mode == tf.estimator.ModeKeys.TRAIN
   outputs, predictions = model(features, labels=labels, training=training)
   if mode != tf.estimator.ModeKeys.PREDICT:
     loss = model.compute_loss(outputs, labels, training=training)
     if mode == tf.estimator.ModeKeys.EVAL:
       # Check that returned evaluation metrics are expected.
       eval_metrics = model.get_metrics()
       if eval_metrics is not None:
         model.update_metrics(eval_metrics, predictions, labels)
         for metric in metrics:
           self.assertIn(metric, eval_metrics)
       try:
         # Check that scores can be computed and printed without errors.
         scores = model.score(features, labels)
         first_score = tf.nest.map_structure(
             lambda x: x.numpy(),
             next(misc.extract_batches(scores)))
         with open(os.devnull, "w") as devnull:
           model.print_score(first_score, stream=devnull)
       except NotImplementedError:
         pass
   else:
     # Check that all prediction heads are returned.
     self.assertIsInstance(predictions, dict)
     if prediction_heads is not None:
       for head in prediction_heads:
         self.assertIn(head, predictions)
     # Check that the prediction can be printed without errors.
     first_prediction = tf.nest.map_structure(
         lambda x: x.numpy(),
         next(misc.extract_batches(predictions)))
     with open(os.devnull, "w") as devnull:
       model.print_prediction(first_prediction, stream=devnull)
Exemplo n.º 9
0
    def _analyze(
            self,
            inputs_list: List[lit_types.JsonDict]) -> List[lit_types.JsonDict]:
        features_list: List[str] = list(
            map(lambda input: encode_sp(self.src_spp, input["src_text"]),
                inputs_list))
        infer_config: dict = self.config["infer"]
        dataset = make_inference_dataset(
            self.model,
            features_list,
            infer_config["batch_size"],
            length_bucket_width=infer_config["length_bucket_width"],
            prefetch_buffer_size=infer_config.get("prefetch_buffer_size"),
        )

        if self._analyze_fn is None:
            self._analyze_fn = tf.function(
                self.model.analyze, input_signature=(dataset.element_spec, ))
            if not tf.config.functions_run_eagerly():
                tf.get_logger().info(
                    "Tracing and optimizing the analyze graph...")
                self._analyze_fn.get_concrete_function(
                )  # Trace the function now.

        results: List[lit_types.JsonDict] = [None] * len(features_list)
        for features in dataset:
            predictions = self._analyze_fn(features)

            top_k_probs, top_k_ids = tf.nn.top_k(tf.nn.softmax(
                predictions["logits"]),
                                                 k=10)
            del predictions["logits"]
            predictions["top_k_probs"] = top_k_probs
            predictions["top_k_ids"] = top_k_ids

            masks = tf.sequence_mask(features["length"],
                                     maxlen=tf.shape(features["ids"])[1])
            predictions["encoder_final_embedding"] = masked_token_mean(
                predictions["encoder_outputs"], masks)
            del predictions["encoder_outputs"]

            predictions = tf.nest.map_structure(lambda t: t.numpy(),
                                                predictions)
            for prediction in extract_batches(predictions):
                index: int = prediction["index"]
                target_length = prediction["length"]
                trg_tokens = prediction["tokens"][:target_length]
                tok_trg_text = self.model.labels_inputter.tokenizer.detokenize(
                    trg_tokens)
                trg_text = decode_sp(tok_trg_text)
                attention = prediction["alignment"][:target_length]
                probs = prediction["top_k_probs"]
                ids = prediction["top_k_ids"]
                pred_tokens = list(
                    self._convert_top_k(ids, probs, target_length))
                encoder_final_embedding = prediction["encoder_final_embedding"]
                ref_text = inputs_list[index]["ref_text"]
                tok_ref_text = encode_sp(self.trg_spp, ref_text)
                ter_score = sacrebleu.sentence_ter(tok_trg_text,
                                                   [tok_ref_text])
                chrf_score = sacrebleu.sentence_chrf(trg_text, [ref_text],
                                                     order=3)
                results[index] = {
                    "trg_tokens": [t.decode("utf-8") for t in trg_tokens],
                    "trg_text": trg_text,
                    "attention": np.expand_dims(attention, axis=0),
                    "src_tokens": features_list[index].split(),
                    "pred_tokens": pred_tokens,
                    "encoder_final_embedding": encoder_final_embedding,
                    "ter": ter_score.score,
                    "chrf3": chrf_score.score,
                }
        return results
Exemplo n.º 10
0
  def score(self, features_file, predictions_file, checkpoint_path=None):
    """Scores existing predictions.

    Args:
      features_file: The input file.
      predictions_file: The predictions file to score.
      checkpoint_path: Path of a specific checkpoint to use. If ``None``,
        the latest is used.

    Raises:
      ValueError: if no checkpoint are found or if the model is not a sequence to
        sequence model.
    """
    if not hasattr(self._model, "target_inputter"):
      raise ValueError("scoring only works for sequence to sequence models")

    if checkpoint_path is None:
      checkpoint_path = tf.train.latest_checkpoint(self._estimator.model_dir)
    elif os.path.isdir(checkpoint_path):
      checkpoint_path = tf.train.latest_checkpoint(checkpoint_path)
    if checkpoint_path is None:
      raise ValueError("could not find a trained model in %s" % self._estimator.model_dir)

    if "score" not in self._config:
      self._config["score"] = {}
    batch_size = self._config["score"].get("batch_size", 64)
    input_fn = self._model.input_fn(
        tf.estimator.ModeKeys.EVAL,
        batch_size,
        self._config["data"],
        features_file,
        labels_file=predictions_file,
        num_threads=self._config["score"].get("num_threads"),
        prefetch_buffer_size=self._config["score"].get("prefetch_buffer_size"))

    with tf.Graph().as_default() as g:
      tf.train.create_global_step(g)
      features, labels = input_fn()
      with tf.variable_scope(self._model.name):
        logits, _ = self._model(
            features,
            labels,
            self._estimator.params,
            tf.estimator.ModeKeys.EVAL)

      cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
          logits=logits, labels=labels["ids_out"])
      weights = tf.sequence_mask(labels["length"], dtype=cross_entropy.dtype)
      masked_cross_entropy = cross_entropy * weights
      scores = (tf.reduce_sum(masked_cross_entropy, axis=1) /
                tf.cast(labels["length"], cross_entropy.dtype))
      results = {
          "score": scores,
          "tokens": labels["tokens"],
          "length": labels["length"] - 1  # For -1, see sequence_to_sequence.shift_target_sequence.
      }

      with tf.train.MonitoredSession(
          session_creator=tf.train.ChiefSessionCreator(
              checkpoint_filename_with_path=checkpoint_path,
              config=self._estimator.config.session_config)) as sess:
        while not sess.should_stop():
          for batch in extract_batches(sess.run(results)):
            tokens = batch["tokens"][:batch["length"]]
            sentence = self._model.target_inputter.tokenizer.detokenize(tokens)
            fmt = "%f ||| %s" % (batch["score"], sentence)
            print_bytes(tf.compat.as_bytes(fmt))
Exemplo n.º 11
0
    def infer(self,
              features_file,
              predictions_file=None,
              checkpoint_path=None,
              log_time=False):
        """Runs inference.

    Args:
      features_file: The file(s) to infer from.
      predictions_file: If set, predictions are saved in this file.
      checkpoint_path: Path of a specific checkpoint to predict. If ``None``,
        the latest is used.
      log_time: If ``True``, several time metrics will be printed in the logs at
        the end of the inference loop.
    """
        checkpoint, config = self._init_run()
        checkpoint.restore(checkpoint_path=checkpoint_path, weights_only=True)
        model = checkpoint.model
        infer_config = config["infer"]
        dataset = model.examples_inputter.make_inference_dataset(
            features_file,
            infer_config["batch_size"],
            length_bucket_width=infer_config["length_bucket_width"],
            prefetch_buffer_size=infer_config.get("prefetch_buffer_size"))

        @dataset_util.function_on_next(dataset, as_numpy=True)
        def _predict(next_fn):
            source = next_fn()
            return model.infer(source)

        if predictions_file:
            stream = io.open(predictions_file, encoding="utf-8", mode="w")
        else:
            stream = sys.stdout

        ordered_writer = None
        write_fn = lambda prediction: (model.print_prediction(
            prediction, params=infer_config, stream=stream))

        total_time = 0
        total_tokens = 0
        total_examples = 0
        start_time = time.time()

        for predictions in _predict():  # pylint: disable=no-value-for-parameter
            end_time = time.time()
            if log_time:
                total_time += end_time - start_time
                batch_size = next(six.itervalues(predictions)).shape[0]
                total_examples += batch_size
                length = predictions.get("length")
                if length is not None:
                    if len(length.shape) == 2:
                        length = length[:, 0]
                    total_tokens += sum(length)
            for prediction in misc.extract_batches(predictions):
                if "index" in prediction:
                    if ordered_writer is None:
                        ordered_writer = misc.OrderRestorer(
                            index_fn=lambda prediction: prediction["index"],
                            callback_fn=write_fn)
                    ordered_writer.push(prediction)
                else:
                    write_fn(prediction)
            start_time = time.time()

        if log_time:
            tf.get_logger().info("Total prediction time (s): %f", total_time)
            tf.get_logger().info("Average prediction time (s): %f",
                                 total_time / total_examples)
            if total_tokens > 0:
                tf.get_logger().info("Tokens per second: %f",
                                     total_tokens / total_time)
        if predictions_file:
            stream.close()
Exemplo n.º 12
0
    def infer(self,
              features_file,
              predictions_file=None,
              checkpoint_path=None,
              log_time=False):
        """Runs inference.

    Args:
      features_file: The file(s) to infer from.
      predictions_file: If set, predictions are saved in this file.
      checkpoint_path: Path of a specific checkpoint to predict. If ``None``,
        the latest is used.
      log_time: If ``True``, several time metrics will be printed in the logs at
        the end of the inference loop.
    """
        config = self._finalize_config()
        model = self._init_model(config)
        checkpoint = checkpoint_util.Checkpoint.from_config(config, model)
        checkpoint.restore(checkpoint_path=checkpoint_path, weights_only=True)
        infer_config = config["infer"]
        dataset = model.examples_inputter.make_inference_dataset(
            features_file,
            infer_config["batch_size"],
            length_bucket_width=infer_config["length_bucket_width"],
            prefetch_buffer_size=infer_config.get("prefetch_buffer_size"))

        if predictions_file:
            stream = open(predictions_file, encoding="utf-8", mode="w")
        else:
            stream = sys.stdout

        infer_fn = tf.function(model.infer,
                               input_signature=(dataset.element_spec, ))
        tf.get_logger().info("Tracing and optimizing the inference graph...")
        infer_fn.get_concrete_function()  # Trace the function now.

        # Inference might return out-of-order predictions. The OrderRestorer utility is
        # used to write predictions in their original order.
        write_fn = lambda prediction: (model.print_prediction(
            prediction, params=infer_config, stream=stream))
        index_fn = lambda prediction: prediction.get("index")
        ordered_writer = misc.OrderRestorer(index_fn, write_fn)

        total_time = 0
        total_tokens = 0
        total_examples = 0
        start_time = time.time()

        # When the inference dataset is bucketized, it can happen that no output is
        # written in a long time. To avoid confusion and give the impression that
        # the process is stuck, we ensure that something is logged regularly.
        max_time_without_output = 10
        last_output_time = start_time

        for source in dataset:
            predictions = infer_fn(source)
            predictions = tf.nest.map_structure(lambda t: t.numpy(),
                                                predictions)
            batch_time = time.time()

            for prediction in misc.extract_batches(predictions):
                written = ordered_writer.push(prediction)
                if written:
                    last_output_time = batch_time
                else:
                    time_without_output = batch_time - last_output_time
                    if time_without_output >= max_time_without_output:
                        tf.get_logger().info(
                            "%d predictions are buffered, but waiting for the prediction of "
                            "line %d to advance the output...",
                            ordered_writer.buffer_size,
                            ordered_writer.next_index + 1)
                        last_output_time = batch_time

            if log_time:
                batch_size = next(iter(predictions.values())).shape[0]
                total_examples += batch_size
                length = predictions.get("length")
                if length is not None:
                    if len(length.shape) == 2:
                        length = length[:, 0]
                    total_tokens += sum(length)

        if log_time:
            end_time = time.time()
            total_time = end_time - start_time
            tf.get_logger().info("Total prediction time (s): %f", total_time)
            tf.get_logger().info("Average prediction time (s): %f",
                                 total_time / total_examples)
            if total_tokens > 0:
                tf.get_logger().info("Tokens per second: %f",
                                     total_tokens / total_time)
        if predictions_file:
            stream.close()
Exemplo n.º 13
0
  def score(self, features_file, predictions_file, checkpoint_path=None):
    """Scores existing predictions.

    Args:
      features_file: The input file.
      predictions_file: The predictions file to score.
      checkpoint_path: Path of a specific checkpoint to use. If ``None``,
        the latest is used.

    Raises:
      ValueError: if no checkpoint are found or if the model is not a sequence to
        sequence model.
    """
    if not hasattr(self._model, "target_inputter"):
      raise ValueError("scoring only works for sequence to sequence models")

    if checkpoint_path is None:
      checkpoint_path = tf.train.latest_checkpoint(self._config["model_dir"])
    elif tf.gfile.IsDirectory(checkpoint_path):
      checkpoint_path = tf.train.latest_checkpoint(checkpoint_path)
    if checkpoint_path is None:
      raise ValueError("could not find a trained model in %s" % self._config["model_dir"])

    input_fn = self._model.input_fn(
        tf.estimator.ModeKeys.EVAL,
        self._config["score"]["batch_size"],
        self._config["data"],
        features_file,
        labels_file=predictions_file,
        num_threads=self._config["score"].get("num_threads"),
        prefetch_buffer_size=self._config["score"].get("prefetch_buffer_size"))

    with tf.Graph().as_default() as g:
      tf.train.create_global_step(g)
      features, labels = input_fn()
      labels["alignment"] = None  # Add alignment key to force the model to return attention.
      with tf.variable_scope(self._model.name):
        outputs, _ = self._model(
            features,
            labels,
            self._config["params"],
            tf.estimator.ModeKeys.EVAL)

      cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
          logits=outputs["logits"], labels=labels["ids_out"])
      weights = tf.sequence_mask(labels["length"], dtype=cross_entropy.dtype)
      masked_cross_entropy = cross_entropy * weights
      scores = tf.reduce_sum(masked_cross_entropy, axis=1)
      results = {
          "attention": outputs["attention"],
          "cross_entropy": cross_entropy,
          "score": scores,
          "tokens": labels["tokens"],
          "length": labels["length"] - 1  # For -1, see sequence_to_sequence.shift_target_sequence.
      }

      with tf.train.MonitoredSession(
          session_creator=tf.train.ChiefSessionCreator(
              checkpoint_filename_with_path=checkpoint_path,
              config=self._session_config)) as sess:
        while not sess.should_stop():
          for batch in misc.extract_batches(sess.run(results)):
            tokens = batch["tokens"][:batch["length"]]
            sentence = self._model.target_inputter.tokenizer.detokenize(tokens)
            token_level_scores = None
            if self._config["score"].get("with_token_level"):
              token_level_scores = batch["cross_entropy"][:batch["length"]]
            alignment_type = self._config["score"].get("with_alignments")
            sentence = format_translation_output(
                sentence,
                score=batch["score"],
                token_level_scores=token_level_scores,
                attention=batch["attention"][:batch["length"]],
                alignment_type=alignment_type)
            misc.print_bytes(tf.compat.as_bytes(sentence))
Exemplo n.º 14
0
def predict_dataset(model,
                    dataset,
                    print_params=None,
                    predictions_file=None,
                    log_time=False):
    """Outputs the model predictions for the dataset.

    To run inference on strings directly, see
    :meth:`opennmt.models.Model.serve_function`.

    Args:
      model: A :class:`opennmt.models.Model` instance.
      dataset: A ``tf.data.Dataset`` instance outputting features.
      print_params: A dictionary of parameters passed to
        :meth:`opennmt.models.Model.print_prediction`.
      predictions_file: If set, predictions are saved in this file, otherwise they
        are printed on the standard output.
      log_time: If ``True``, several time metrics will be printed in the logs at
        the end of the inference loop.
    """
    if predictions_file:
        stream = open(predictions_file, encoding="utf-8", mode="w")
    else:
        stream = sys.stdout

    infer_fn = tf.function(model.infer,
                           input_signature=(dataset.element_spec, ))
    if not tf.config.functions_run_eagerly():
        tf.get_logger().info("Tracing and optimizing the inference graph...")
        infer_fn.get_concrete_function()  # Trace the function now.

    # Inference might return out-of-order predictions. The OrderRestorer utility is
    # used to write predictions in their original order.
    write_fn = lambda prediction: (model.print_prediction(
        prediction, params=print_params, stream=stream))
    index_fn = lambda prediction: prediction.get("index")
    ordered_writer = misc.OrderRestorer(index_fn, write_fn)

    total_time = 0
    total_tokens = 0
    total_examples = 0
    start_time = time.time()

    # When the inference dataset is bucketized, it can happen that no output is
    # written in a long time. To avoid confusion and give the impression that
    # the process is stuck, we ensure that something is logged regularly.
    max_time_without_output = 10
    last_output_time = start_time

    for features in dataset:
        predictions = infer_fn(features)
        predictions = tf.nest.map_structure(lambda t: t.numpy(), predictions)
        batch_time = time.time()

        for prediction in misc.extract_batches(predictions):
            written = ordered_writer.push(prediction)
            if written:
                last_output_time = batch_time
            else:
                time_without_output = batch_time - last_output_time
                if time_without_output >= max_time_without_output:
                    tf.get_logger().info(
                        "%d predictions are buffered, but waiting for the prediction of "
                        "line %d to advance the output...",
                        ordered_writer.buffer_size,
                        ordered_writer.next_index + 1,
                    )
                    last_output_time = batch_time

        if log_time:
            batch_size = next(iter(predictions.values())).shape[0]
            total_examples += batch_size
            length = predictions.get("length")
            if length is not None:
                if len(length.shape) == 2:
                    length = length[:, 0]
                total_tokens += sum(length)

    if log_time:
        end_time = time.time()
        total_time = end_time - start_time
        tf.get_logger().info("Total prediction time (s): %f", total_time)
        tf.get_logger().info("Average prediction time (s): %f",
                             total_time / total_examples)
        if total_tokens > 0:
            tf.get_logger().info("Tokens per second: %f",
                                 total_tokens / total_time)
    if predictions_file:
        stream.close()
Exemplo n.º 15
0
 def after_run(self, run_context, run_values):  # pylint: disable=unused-argument
   predictions, self._current_step = run_values.results
   self._output_path = "{}.{}".format(self._output_file, self._current_step)
   with open(self._output_path, "a") as output_file:
     for prediction in misc.extract_batches(predictions):
       self._model.print_prediction(prediction, stream=output_file)
Exemplo n.º 16
0
    def __call__(self, step):
        """Runs the evaluator.

        Args:
          step: The current training step.

        Returns:
          A dictionary of evaluation metrics.
        """
        tf.get_logger().info("Running evaluation for step %d", step)
        output_file = None
        output_path = None
        if self._save_predictions:
            output_path = os.path.join(self._eval_dir,
                                       "predictions.txt.%d" % step)
            output_file = tf.io.gfile.GFile(output_path, "w")
            params = {"n_best": 1}
            write_fn = lambda prediction: (self._model.print_prediction(
                prediction, params=params, stream=output_file))
            index_fn = lambda prediction: prediction.get("index")
            ordered_writer = misc.OrderRestorer(index_fn, write_fn)

        loss_num = 0
        loss_den = 0
        metrics = self._model.get_metrics()
        for source, target in self._dataset:
            loss, predictions = self._eval_fn(source, target)
            if isinstance(loss, tuple):
                loss_num += loss[0]
                loss_den += loss[1]
            else:
                loss_num += loss
                loss_den += 1
            if metrics:
                self._model.update_metrics(metrics, predictions, target)
            if output_file is not None:
                predictions = {k: v.numpy() for k, v in predictions.items()}
                for prediction in misc.extract_batches(predictions):
                    ordered_writer.push(prediction)
        if loss_den == 0:
            raise RuntimeError("No examples were evaluated")
        loss = loss_num / loss_den

        results = dict(loss=loss, perplexity=tf.math.exp(loss))
        if metrics:
            for name, metric in metrics.items():
                results[name] = metric.result()
        if self._save_predictions:
            tf.get_logger().info("Evaluation predictions saved to %s",
                                 output_path)
            output_file.close()
            for scorer in self._scorers:
                score = scorer(self._labels_file, output_path)
                if isinstance(score, dict):
                    results.update(score)
                else:
                    results[scorer.name] = score

        for name, value in results.items():
            if isinstance(value, tf.Tensor):
                results[name] = value.numpy()

        self._record_results(step, results)
        self._maybe_export(step, results)
        self._maybe_garbage_collect_exports()
        return results
Exemplo n.º 17
0
    def score(self,
              features_file,
              predictions_file,
              checkpoint_path=None,
              output_file=None):
        """Scores existing predictions.

    Args:
      features_file: The input file.
      predictions_file: The predictions file to score.
      checkpoint_path: Path of a specific checkpoint to use. If ``None``,
        the latest is used.
      output_file: The file where the scores are saved. Otherwise, they will be
        printed on the standard output.

    Raises:
      ValueError: if no checkpoint are found or if the model is not a sequence to
        sequence model.
    """
        if not isinstance(self._model,
                          (models.LanguageModel, models.SequenceToSequence)):
            raise ValueError(
                "scoring only works for sequence to sequence or language models"
            )

        if checkpoint_path is None:
            checkpoint_path = tf.train.latest_checkpoint(
                self._config["model_dir"])
        elif tf.gfile.IsDirectory(checkpoint_path):
            checkpoint_path = tf.train.latest_checkpoint(checkpoint_path)
        if checkpoint_path is None:
            raise ValueError("could not find a trained model in %s" %
                             self._config["model_dir"])

        model = copy.deepcopy(self._model)
        with tf.Graph().as_default():
            dataset = model.examples_inputter.make_evaluation_dataset(
                features_file,
                predictions_file,
                self._config["score"]["batch_size"],
                num_threads=self._config["score"].get("num_threads"),
                prefetch_buffer_size=self._config["score"].get(
                    "prefetch_buffer_size"))
            iterator = dataset.make_initializable_iterator()
            features, labels = iterator.get_next()
            labels[
                "alignment"] = None  # Add alignment key to force the model to return attention.
            outputs, _ = model(features, labels, self._config["params"],
                               tf.estimator.ModeKeys.EVAL)

            cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=outputs["logits"], labels=labels["ids_out"])
            weights = tf.sequence_mask(labels["length"],
                                       dtype=cross_entropy.dtype)
            masked_cross_entropy = cross_entropy * weights
            scores = (tf.reduce_sum(masked_cross_entropy, axis=1) /
                      tf.cast(labels["length"], cross_entropy.dtype))
            results = {
                "cross_entropy": cross_entropy,
                "score": scores,
                "tokens": labels["tokens"],
                "length": labels["length"] - 1  # -1 for the special token.
            }
            if "attention" in outputs:
                results["attention"] = outputs["attention"]

            if output_file:
                stream = io.open(output_file, encoding="utf-8", mode="w")
            else:
                stream = sys.stdout

            output_tokenizer = (self._model.labels_inputter.tokenizer
                                if not self._model.unsupervised else
                                self._model.features_inputter.tokenizer)
            with tf.train.MonitoredSession(
                    session_creator=tf.train.ChiefSessionCreator(
                        checkpoint_filename_with_path=checkpoint_path,
                        config=self._session_config)) as sess:
                sess.run(iterator.initializer)
                while not sess.should_stop():
                    for batch in misc.extract_batches(sess.run(results)):
                        tokens = batch["tokens"][:batch["length"]]
                        sentence = output_tokenizer.detokenize(tokens)
                        token_level_scores = None
                        attention = None
                        if self._config["score"].get("with_token_level"):
                            token_level_scores = batch[
                                "cross_entropy"][:batch["length"]]
                        if "attention" in batch:
                            attention = batch["attention"][:batch["length"]]
                        alignment_type = self._config["score"].get(
                            "with_alignments")
                        sentence = format_translation_output(
                            sentence,
                            score=batch["score"],
                            token_level_scores=token_level_scores,
                            attention=attention,
                            alignment_type=alignment_type)
                        misc.print_bytes(tf.compat.as_bytes(sentence),
                                         stream=stream)

            if output_file:
                stream.close()
Exemplo n.º 18
0
    def infer(self,
              features_file,
              predictions_file=None,
              checkpoint_path=None,
              log_time=False):
        """Runs inference.

    Args:
      features_file: The file(s) to infer from.
      predictions_file: If set, predictions are saved in this file.
      checkpoint_path: Path of a specific checkpoint to predict. If ``None``,
        the latest is used.
      log_time: If ``True``, several time metrics will be printed in the logs at
        the end of the inference loop.
    """
        checkpoint, config = self._init_run()
        checkpoint.restore(checkpoint_path=checkpoint_path, weights_only=True)
        model = checkpoint.model
        infer_config = config["infer"]
        dataset = model.examples_inputter.make_inference_dataset(
            features_file,
            infer_config["batch_size"],
            length_bucket_width=infer_config["length_bucket_width"],
            prefetch_buffer_size=infer_config.get("prefetch_buffer_size"))

        if predictions_file:
            stream = open(predictions_file, encoding="utf-8", mode="w")
        else:
            stream = sys.stdout

        infer_fn = tf.function(model.infer,
                               input_signature=(dataset.element_spec, ))
        infer_fn.get_concrete_function()  # Trace the function now.

        # Inference might return out-of-order predictions. The OrderRestorer utility is
        # used to write predictions in their original order.
        write_fn = lambda prediction: (model.print_prediction(
            prediction, params=infer_config, stream=stream))
        index_fn = lambda prediction: prediction.get("index")
        ordered_writer = misc.OrderRestorer(index_fn, write_fn)

        total_time = 0
        total_tokens = 0
        total_examples = 0
        start_time = time.time()

        for source in dataset:
            predictions = infer_fn(source)
            predictions = tf.nest.map_structure(lambda t: t.numpy(),
                                                predictions)
            for prediction in misc.extract_batches(predictions):
                ordered_writer.push(prediction)
            if log_time:
                batch_size = next(iter(predictions.values())).shape[0]
                total_examples += batch_size
                length = predictions.get("length")
                if length is not None:
                    if len(length.shape) == 2:
                        length = length[:, 0]
                    total_tokens += sum(length)

        if log_time:
            end_time = time.time()
            total_time = end_time - start_time
            tf.get_logger().info("Total prediction time (s): %f", total_time)
            tf.get_logger().info("Average prediction time (s): %f",
                                 total_time / total_examples)
            if total_tokens > 0:
                tf.get_logger().info("Tokens per second: %f",
                                     total_tokens / total_time)
        if predictions_file:
            stream.close()