예제 #1
0
def model_fn(features, labels, mode, params):
  """Defines how to train, evaluate and predict from the transformer model."""
  with tf.variable_scope("model"):
    inputs, targets = features, labels

    # Create model and get output logits.
    model = transformer.Transformer(params, mode == tf.estimator.ModeKeys.TRAIN)

    output = model(inputs, targets)

    # When in prediction mode, the labels/targets is None. The model output
    # is the prediction
    if mode == tf.estimator.ModeKeys.PREDICT:
      return tf.estimator.EstimatorSpec(
          tf.estimator.ModeKeys.PREDICT,
          predictions=output)

    logits = output

    # Calculate model loss.
    xentropy, weights = metrics.padded_cross_entropy_loss(
        logits, targets, params.label_smoothing, params.vocab_size)
    loss = tf.reduce_sum(xentropy * weights) / tf.reduce_sum(weights)

    if mode == tf.estimator.ModeKeys.EVAL:
      return tf.estimator.EstimatorSpec(
          mode=mode, loss=loss, predictions={"predictions": logits},
          eval_metric_ops=metrics.get_eval_metrics(logits, labels, params))
    else:
      train_op = get_train_op(loss, params)
      return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
예제 #2
0
def model_fn(features, labels, mode, params):
  """Defines how to train, evaluate and predict from the transformer model."""
  with tf.variable_scope("model"):
    inputs, targets = features, labels

    # Create model and get output logits.
    model = transformer.Transformer(params, mode == tf.estimator.ModeKeys.TRAIN)

    output = model(inputs, targets)

    # When in prediction mode, the labels/targets is None. The model output
    # is the prediction
    if mode == tf.estimator.ModeKeys.PREDICT:
      return tf.estimator.EstimatorSpec(
          tf.estimator.ModeKeys.PREDICT,
          predictions=output)

    logits = output

    # Calculate model loss.
    xentropy, weights = metrics.padded_cross_entropy_loss(
        logits, targets, params.label_smoothing, params.vocab_size)
    loss = tf.reduce_sum(xentropy * weights) / tf.reduce_sum(weights)

    if mode == tf.estimator.ModeKeys.EVAL:
      return tf.estimator.EstimatorSpec(
          mode=mode, loss=loss, predictions={"predictions": logits},
          eval_metric_ops=metrics.get_eval_metrics(logits, labels, params))
    else:
      train_op = get_train_op(loss, params)
      return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
예제 #3
0
def model_fn(features, labels, mode, params):
    """Defines how to train, evaluate and predict from the transformer model."""
    with tf.variable_scope("model"):
        inputs, targets = features, labels

        # Create model and get output logits.
        model = transformer.Transformer(params,
                                        mode == tf.estimator.ModeKeys.TRAIN)

        logits = model(inputs, targets)

        # When in prediction mode, the labels/targets is None. The model output
        # is the prediction
        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(
                tf.estimator.ModeKeys.PREDICT,
                predictions=logits,
                export_outputs={
                    "translate": tf.estimator.export.PredictOutput(logits)
                })

        # Explicitly set the shape of the logits for XLA (TPU). This is needed
        # because the logits are passed back to the host VM CPU for metric
        # evaluation, and the shape of [?, ?, vocab_size] is too vague. However
        # it is known from Transformer that the first two dimensions of logits
        # are the dimensions of targets. Note that the ambiguous shape of logits is
        # not a problem when computing xentropy, because padded_cross_entropy_loss
        # resolves the shape on the TPU.
        logits.set_shape(targets.shape.as_list() + logits.shape.as_list()[2:])

        # Calculate model loss.
        # xentropy contains the cross entropy loss of every nonpadding token in the
        # targets.
        xentropy, weights = metrics.padded_cross_entropy_loss(
            logits, targets, params["label_smoothing"], params["vocab_size"])
        loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights)

        # Save loss as named tensor that will be logged with the logging hook.
        tf.identity(loss, "cross_entropy")

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(
                mode=mode,
                loss=loss,
                predictions={"predictions": logits},
                eval_metric_ops=metrics.get_eval_metrics(
                    logits, labels, params))
        else:
            train_op, metric_dict = get_train_op_and_metrics(loss, params)

            # Epochs can be quite long. This gives some intermediate information
            # in TensorBoard.
            metric_dict["minibatch_loss"] = loss
            record_scalars(metric_dict)
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op)
예제 #4
0
def model_fn(features, labels, mode, params):
    """Defines how to train, evaluate and predict from the transformer model."""
    if params.frozen_graph and mode == tf.estimator.ModeKeys.PREDICT:
        print("Reading***** From *** pb", flush=True)
        input_map = {'input_tokens': features}
        output_names = [
            'model/Transformer/strided_slice_15',
            'model/Transformer/strided_slice_16'
        ]

        with tf.io.gfile.GFile(params.frozen_graph, "rb") as f:
            graph_def = tf.compat.v1.GraphDef()
            graph_def.ParseFromString(f.read())
        tf.graph_util.import_graph_def(graph_def,
                                       input_map,
                                       output_names,
                                       name="")
        output_tensors = [
            tf.compat.v1.get_default_graph().get_tensor_by_name(name + ":0")
            for name in output_names
        ]
        output = {'outputs': output_tensors[0], 'scores': output_tensors[1]}
        return tf.estimator.EstimatorSpec(tf.estimator.ModeKeys.PREDICT,
                                          predictions=output)
    else:
        with tf.compat.v1.variable_scope("model"):
            inputs, targets = features, labels

            # Create model and get output logits.
            model = transformer.Transformer(
                params, mode == tf.estimator.ModeKeys.TRAIN)

            output = model(inputs, targets)

            # When in prediction mode, the labels/targets is None. The model output
            # is the prediction
            if mode == tf.estimator.ModeKeys.PREDICT:
                return tf.estimator.EstimatorSpec(
                    tf.estimator.ModeKeys.PREDICT, predictions=output)

            logits = output

            # Calculate model loss.
            xentropy, weights = metrics.padded_cross_entropy_loss(
                logits, targets, params.label_smoothing, params.vocab_size)
            loss = tf.reduce_sum(input_tensor=xentropy *
                                 weights) / tf.reduce_sum(input_tensor=weights)

            if mode == tf.estimator.ModeKeys.EVAL:
                return tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=loss,
                    predictions={"predictions": logits},
                    eval_metric_ops=metrics.get_eval_metrics(
                        logits, labels, params))
            else:
                train_op = get_train_op(loss, params)
                logging_hook = tf.compat.v1.train.LoggingTensorHook(
                    {"loss": loss}, every_n_iter=FLAGS.print_iter)
                return tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=loss,
                    train_op=train_op,
                    training_hooks=[logging_hook])
예제 #5
0
def model_fn(features, labels, mode: tf.estimator.ModeKeys, params: dict):
    """
    :param features:
                    encode_inputs = features['encode_feature_name']
    :param labels:
    :param mode:
    :param params:
    :return:
    """
    with tf.variable_scope('model'):
        inputs = features
        transformer = Transformer(params, mode == tf.estimator.ModeKeys.TRAIN)

        logits = transformer(inputs, labels)
        """
            when in prediction mode, the labels and decode_inputs  is None,
            the model output id the prediction
            it is a dict {"outputs": top_decoded_ids, "scores": top_scores}
        """

        if mode == tf.estimator.ModeKeys.PREDICT:
            estimator = tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=logits,
                export_outputs={
                    'translate': tf.estimator.export.PredictOutput(logits)
                })

            return estimator

        logits.set_shape(labels.shape.as_list() + logits.shape.as_list()[2:])

        xentropy, weights = metrics.padded_cross_entropy_loss(
            logits=logits,
            labels=labels,
            smoothing=params.get('label_smoothing'),
            vocab_size=params.get('vocab_size'))

        loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights)

        tf.identity(loss, 'cross_entropy')

        if mode == tf.estimator.ModeKeys.EVAL:
            estimator = tf.estimator.EstimatorSpec(
                mode=mode,
                loss=loss,
                predictions={'predictions': logits},
                eval_metric_ops=metrics.get_eval_metrics(
                    logits, labels, params))

            return estimator

        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op, metrics_dict = model_utils.get_train_op_and_metrics(
                loss, params)

            metrics_dict['mini_batch_loss'] = loss

            model_utils.record_scalars(metrics_dict)

            estimator = tf.estimator.EstimatorSpec(mode=mode,
                                                   loss=loss,
                                                   train_op=train_op)

            return estimator
def model_fn(features, labels, mode, params):
  """Defines how to train, evaluate and predict from the transformer model."""
  with tf.variable_scope("model"):
    inputs, targets = features, labels

    # Create model and get output logits.
    model = transformer.Transformer(params, mode == tf.estimator.ModeKeys.TRAIN)

    # 如果是predict:
    # returns a dictionary {
    #   output: [batch_size, decoded length]
    #   score: [batch_size, float]}
    # else:
    # Returns:
    #   float32 tensor with shape [batch_size, target_length, vocab_size]
    logits = model(inputs, targets)

    # When in prediction mode, the labels/targets is None. The model output
    # is the prediction
    if mode == tf.estimator.ModeKeys.PREDICT:
      if params["use_tpu"]:
        raise NotImplementedError("Prediction is not yet supported on TPUs.")
      return tf.estimator.EstimatorSpec(
          tf.estimator.ModeKeys.PREDICT,
          predictions=logits,
          export_outputs={
              "translate": tf.estimator.export.PredictOutput(logits)
          })

    # Explicitly set the shape of the logits for XLA (TPU). This is needed
    # because the logits are passed back to the host VM CPU for metric
    # evaluation, and the shape of [?, ?, vocab_size] is too vague. However
    # it is known from Transformer that the first two dimensions of logits
    # are the dimensions of targets. Note that the ambiguous shape of logits is
    # not a problem when computing xentropy, because padded_cross_entropy_loss
    # resolves the shape on the TPU.
    logits.set_shape(targets.shape.as_list() + logits.shape.as_list()[2:])

    # Calculate model loss.
    # xentropy contains the cross entropy loss of every nonpadding token in the
    # targets.
    # 训练时,labels 为0(即<PAD>)的对应loss的weight被置0
    xentropy, weights = metrics.padded_cross_entropy_loss(
        logits, targets, params["label_smoothing"], params["vocab_size"])
    loss = tf.reduce_sum(xentropy) / tf.reduce_sum(weights)

    # Save loss as named tensor that will be logged with the logging hook.
    tf.identity(loss, "cross_entropy")

    if mode == tf.estimator.ModeKeys.EVAL:
      if params["use_tpu"]:
        # host call functions should only have tensors as arguments.
        # This lambda pre-populates params so that metric_fn is
        # TPUEstimator compliant.
        def metric_fn(logits, labels): return (
            metrics.get_eval_metrics(logits, labels, params=params))
        eval_metrics = (metric_fn, [logits, labels])
        return tf.contrib.tpu.TPUEstimatorSpec(
            mode=mode, loss=loss, predictions={"predictions": logits},
            eval_metrics=eval_metrics)
      return tf.estimator.EstimatorSpec(
          mode=mode, loss=loss, predictions={"predictions": logits},
          eval_metric_ops=metrics.get_eval_metrics(logits, labels, params))
    else:
      train_op, metric_dict = get_train_op_and_metrics(loss, params)

      # Epochs can be quite long. This gives some intermediate information
      # in TensorBoard.
      metric_dict["minibatch_loss"] = loss
      if params["use_tpu"]:
        return tf.contrib.tpu.TPUEstimatorSpec(
            mode=mode, loss=loss, train_op=train_op,
            host_call=tpu_util.construct_scalar_host_call(
                metric_dict=metric_dict, model_dir=params["model_dir"],
                prefix="training/")
        )
      record_scalars(metric_dict)
      return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
 def metric_fn(logits, labels): return (
     metrics.get_eval_metrics(logits, labels, params=params))
 eval_metrics = (metric_fn, [logits, labels])