def _create_model(params, is_train):
    """Creates transformer model."""

    encdec_kwargs = dict(num_layers=params["num_hidden_layers"],
                         num_attention_heads=params["num_heads"],
                         intermediate_size=params["filter_size"],
                         activation="relu",
                         dropout_rate=params["relu_dropout"],
                         attention_dropout_rate=params["attention_dropout"],
                         use_bias=False,
                         norm_first=True,
                         norm_epsilon=1e-6,
                         intermediate_dropout=params["relu_dropout"])
    encoder_layer = models.TransformerEncoder(**encdec_kwargs)
    decoder_layer = models.TransformerDecoder(**encdec_kwargs)

    model_kwargs = dict(vocab_size=params["vocab_size"],
                        embedding_width=params["hidden_size"],
                        dropout_rate=params["layer_postprocess_dropout"],
                        padded_decode=params["padded_decode"],
                        decode_max_length=params["decode_max_length"],
                        dtype=params["dtype"],
                        extra_decode_length=params["extra_decode_length"],
                        beam_size=params["beam_size"],
                        alpha=params["alpha"],
                        encoder_layer=encoder_layer,
                        decoder_layer=decoder_layer,
                        name="transformer_v2")

    if is_train:
        inputs = tf.keras.layers.Input((None, ), dtype="int64", name="inputs")
        targets = tf.keras.layers.Input((None, ),
                                        dtype="int64",
                                        name="targets")
        internal_model = models.Seq2SeqTransformer(**model_kwargs)
        logits = internal_model(dict(inputs=inputs, targets=targets),
                                training=is_train)
        vocab_size = params["vocab_size"]
        label_smoothing = params["label_smoothing"]
        if params["enable_metrics_in_training"]:
            logits = metrics.MetricLayer(vocab_size)([logits, targets])
        logits = tf.keras.layers.Lambda(lambda x: x,
                                        name="logits",
                                        dtype=tf.float32)(logits)
        model = tf.keras.Model([inputs, targets], logits)
        loss = metrics.transformer_loss(logits, targets, label_smoothing,
                                        vocab_size)
        model.add_loss(loss)
        return model

    batch_size = params["decode_batch_size"] if params[
        "padded_decode"] else None
    inputs = tf.keras.layers.Input((None, ),
                                   batch_size=batch_size,
                                   dtype="int64",
                                   name="inputs")
    internal_model = models.Seq2SeqTransformer(**model_kwargs)
    ret = internal_model(dict(inputs=inputs), training=is_train)
    outputs, scores = ret["outputs"], ret["scores"]
    return tf.keras.Model(inputs, [outputs, scores])
Пример #2
0
def create_model(params, is_train):
    """Creates transformer model."""
    with tf.name_scope("model"):
        if is_train:
            inputs = tf.keras.layers.Input((None, ),
                                           dtype="int64",
                                           name="inputs")
            targets = tf.keras.layers.Input((None, ),
                                            dtype="int64",
                                            name="targets")
            internal_model = Transformer(params, name="transformer_v2")
            logits = internal_model([inputs, targets], training=is_train)
            vocab_size = params["vocab_size"]
            label_smoothing = params["label_smoothing"]
            if params["enable_metrics_in_training"]:
                logits = metrics.MetricLayer(vocab_size)([logits, targets])
            logits = tf.keras.layers.Lambda(lambda x: x,
                                            name="logits",
                                            dtype=tf.float32)(logits)
            model = tf.keras.Model([inputs, targets], logits)
            loss = metrics.transformer_loss(logits, targets, label_smoothing,
                                            vocab_size)
            model.add_loss(loss)
            return model

        else:
            inputs = tf.keras.layers.Input((None, ),
                                           dtype="int64",
                                           name="inputs")
            internal_model = Transformer(params, name="transformer_v2")
            ret = internal_model([inputs], training=is_train)
            outputs, scores = ret["outputs"], ret["scores"]
            return tf.keras.Model(inputs, [outputs, scores])
Пример #3
0
 def test_metric_layer(self):
     vocab_size = 50
     logits = tf.keras.layers.Input((None, vocab_size),
                                    dtype="float32",
                                    name="logits")
     targets = tf.keras.layers.Input((None, ),
                                     dtype="int64",
                                     name="targets")
     output_logits = metrics.MetricLayer(vocab_size)([logits, targets])
     self.assertEqual(output_logits.shape.as_list(), [
         None,
         None,
         vocab_size,
     ])
Пример #4
0
def continuous_eval(strategy,
                    params,
                    model_type,
                    eval_file_pattern=None,
                    batch_size=4,
                    eval_steps=None,
                    model_dir=None,
                    timeout=3000):
  """Continuously evaluate checkpoints on testing data."""
  test_dataset = input_pipeline.get_input_dataset(
      eval_file_pattern,
      batch_size=batch_size,
      params=params,
      is_training=False,
      strategy=strategy)

  with strategy.scope():
    model = models.create_model(model_type, params)
    metric_layer = metrics_v2.MetricLayer(params.vocab_size)
    eval_summary_writer = tf.summary.create_file_writer(
        os.path.join(model_dir, "summaries/eval"))
    global_step = tf.Variable(
        0,
        trainable=False,
        dtype=tf.int64,
        aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA,
        shape=[])

  @tf.function
  def test_step(inputs):
    """Calculates evaluation metrics on distributed devices."""

    def _test_step_fn(inputs):
      """Replicated accuracy calculation."""
      targets = models.remove_sos_from_seq(inputs["target_ids"],
                                           params.pad_token_id)

      # Using ground truth sequences as targets to calculate logits for accuracy
      # and perplexity metrics.
      logits, _, _ = model(inputs, training=False, mode="train")
      metric_layer([logits, targets])

      # Get logits from top beam search results for bleu and rouge metrics.
      logits = model(inputs, training=False, mode="eval")

      return targets, logits

    outputs = strategy.run(_test_step_fn, args=(inputs,))

    return tf.nest.map_structure(strategy.experimental_local_results, outputs)

  metrics_and_funcs = [
      (tf.keras.metrics.Mean("bleu", dtype=tf.float32), bleu_score),
      (tf.keras.metrics.Mean("rouge_2_fscore",
                             dtype=tf.float32), rouge_2_fscore),
      (tf.keras.metrics.Mean("rouge_l_fscore",
                             dtype=tf.float32), rouge_l_fscore),
  ]
  eval_results = {}
  for latest_checkpoint in tf.train.checkpoints_iterator(
      model_dir, timeout=timeout):
    checkpoint = tf.train.Checkpoint(model=model, global_step=global_step)
    checkpoint.restore(latest_checkpoint).expect_partial()
    logging.info("Loaded checkpoint %s", latest_checkpoint)

    for i, inputs in enumerate(test_dataset):
      if eval_steps and i >= eval_steps:
        break
      outputs = test_step(inputs)
      for metric, func in metrics_and_funcs:
        for targets, logits in zip(outputs[0], outputs[1]):
          metric.update_state(func(logits.numpy(), targets.numpy()))

    with eval_summary_writer.as_default():
      step = global_step.numpy()
      for metric, _ in metrics_and_funcs:
        eval_results[metric.name] = metric.result().numpy().astype(float)
        tf.summary.scalar(
            metric.name,
            eval_results[metric.name],
            step=step)
      for metric in metric_layer.metrics:
        eval_results[metric.name] = metric.result().numpy().astype(float)
        tf.summary.scalar(
            metric.name,
            eval_results[metric.name],
            step=step)
      logging.info("Step %d Metrics= %s", step, str(eval_results))
      eval_summary_writer.flush()

    # Resets metrics.
    for metric, _ in metrics_and_funcs:
      metric.reset_states()
    for metric in metric_layer.metrics:
      metric.reset_states()
  return eval_results