Exemplo n.º 1
0
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    if isinstance(features, dict):
      if not labels and "labels" in features:
        labels = features["labels"]
      features = features["input_ids"]

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    model = modeling.BertModel(bert_config)
    headl = ClassifierLossLayer(
        bert_config["num_labels"], bert_config["hidden_dropout_prob"],
        utils.create_initializer(bert_config["initializer_range"]),
        name=bert_config["scope"]+"/classifier")

    _, pooled_output = model(features, training=is_training)
    total_loss, log_probs = headl(pooled_output, labels, is_training)

    tvars = tf.compat.v1.trainable_variables()
    utils.log_variables(tvars, bert_config["ckpt_var_list"])

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:

      learning_rate = optimization.get_linear_warmup_linear_decay_lr(
          init_lr=bert_config["learning_rate"],
          num_train_steps=bert_config["num_train_steps"],
          num_warmup_steps=bert_config["num_warmup_steps"])

      optimizer = optimization.get_optimizer(bert_config, learning_rate)

      global_step = tf.compat.v1.train.get_or_create_global_step()

      gradients = optimizer.compute_gradients(total_loss, tvars)
      train_op = optimizer.apply_gradients(gradients, global_step=global_step)

      output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          host_call=utils.add_scalars_to_summary(
              bert_config["output_dir"], {"learning_rate": learning_rate}))

    elif mode == tf.estimator.ModeKeys.EVAL:

      def metric_fn(loss_value, label_ids, log_probs):
        loss = tf.compat.v1.metrics.mean(values=loss_value)

        predictions = tf.argmax(log_probs, axis=-1, output_type=tf.int32)
        accuracy = tf.compat.v1.metrics.accuracy(
            labels=label_ids, predictions=predictions)
        p1, p1_op = tf.compat.v1.metrics.precision_at_k(
            labels=tf.cast(label_ids, tf.int64), predictions=log_probs, k=1)
        r1, r1_op = tf.compat.v1.metrics.recall_at_k(
            labels=tf.cast(label_ids, tf.int64), predictions=log_probs, k=1)
        f11 = tf.math.divide_no_nan(2*p1*r1, p1+r1)

        metric_dict = {
            "P@1": (p1, p1_op),
            "R@1": (r1, r1_op),
            "f1@1": (f11, tf.no_op()),
            "classification_accuracy": accuracy,
            "classification_loss": loss,
        }

        return metric_dict

      eval_metrics = (metric_fn,
                      [tf.expand_dims(total_loss, 0), labels, log_probs])
      output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          eval_metrics=eval_metrics)
    else:
      output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
          mode=mode,
          predictions={"log-probabilities": log_probs})

    return output_spec
Exemplo n.º 2
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.BertModel(bert_config)
        masked_lm = MaskedLMLayer(bert_config["hidden_size"],
                                  bert_config["vocab_size"],
                                  model.embeder,
                                  initializer=utils.create_initializer(
                                      bert_config["initializer_range"]),
                                  activation_fn=utils.get_activation(
                                      bert_config["hidden_act"]))
        next_sentence = NSPLayer(bert_config["hidden_size"],
                                 initializer=utils.create_initializer(
                                     bert_config["initializer_range"]))

        sequence_output, pooled_output = model(
            features["input_ids"],
            training=is_training,
            token_type_ids=features.get("segment_ids"))

        masked_lm_loss, masked_lm_log_probs = masked_lm(
            sequence_output,
            label_ids=features.get("masked_lm_ids"),
            label_weights=features.get("masked_lm_weights"),
            masked_lm_positions=features.get("masked_lm_positions"))

        next_sentence_loss, next_sentence_log_probs = next_sentence(
            pooled_output, features.get("next_sentence_labels"))

        total_loss = masked_lm_loss
        if bert_config["use_nsp"]:
            total_loss += next_sentence_loss

        tvars = tf.compat.v1.trainable_variables()
        utils.log_variables(tvars, bert_config["ckpt_var_list"])

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            learning_rate = optimization.get_linear_warmup_linear_decay_lr(
                init_lr=bert_config["learning_rate"],
                num_train_steps=bert_config["num_train_steps"],
                num_warmup_steps=bert_config["num_warmup_steps"])

            optimizer = optimization.get_optimizer(bert_config, learning_rate)

            global_step = tf.compat.v1.train.get_global_step()

            gradients = optimizer.compute_gradients(total_loss, tvars)
            train_op = optimizer.apply_gradients(gradients,
                                                 global_step=global_step)

            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                host_call=utils.add_scalars_to_summary(
                    bert_config["output_dir"],
                    {"learning_rate": learning_rate}))

        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(masked_lm_loss_value, masked_lm_log_probs,
                          masked_lm_ids, masked_lm_weights,
                          next_sentence_loss_value, next_sentence_log_probs,
                          next_sentence_labels):
                """Computes the loss and accuracy of the model."""
                masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                                  axis=-1,
                                                  output_type=tf.int32)
                masked_lm_accuracy = tf.compat.v1.metrics.accuracy(
                    labels=masked_lm_ids,
                    predictions=masked_lm_predictions,
                    weights=masked_lm_weights)
                masked_lm_mean_loss = tf.compat.v1.metrics.mean(
                    values=masked_lm_loss_value)

                next_sentence_predictions = tf.argmax(next_sentence_log_probs,
                                                      axis=-1,
                                                      output_type=tf.int32)
                next_sentence_accuracy = tf.compat.v1.metrics.accuracy(
                    labels=next_sentence_labels,
                    predictions=next_sentence_predictions)
                next_sentence_mean_loss = tf.compat.v1.metrics.mean(
                    values=next_sentence_loss_value)

                return {
                    "masked_lm_accuracy": masked_lm_accuracy,
                    "masked_lm_loss": masked_lm_mean_loss,
                    "next_sentence_accuracy": next_sentence_accuracy,
                    "next_sentence_loss": next_sentence_mean_loss,
                }

            eval_metrics = (metric_fn, [
                masked_lm_loss, masked_lm_log_probs, features["masked_lm_ids"],
                features["masked_lm_weights"], next_sentence_loss,
                next_sentence_log_probs, features["next_sentence_labels"]
            ])
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode, loss=total_loss, eval_metrics=eval_metrics)
        else:

            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={
                    "log-probabilities": masked_lm_log_probs,
                    "seq-embeddings": sequence_output
                })

        return output_spec
Exemplo n.º 3
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        if isinstance(features, dict):
            if not labels and "target_ids" in features:
                labels = features["target_ids"]
            features = features["input_ids"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.TransformerModel(transformer_config)
        (llh, logits, pred_ids), _ = model(features,
                                           target_ids=labels,
                                           training=is_training)

        total_loss = padded_cross_entropy_loss(
            logits, labels, transformer_config["label_smoothing"],
            transformer_config["vocab_size"])

        tvars = tf.compat.v1.trainable_variables()
        utils.log_variables(tvars, transformer_config["ckpt_var_list"])

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            learning_rate = optimization.get_linear_warmup_rsqrt_decay_lr(
                init_lr=transformer_config["learning_rate"],
                hidden_size=transformer_config["hidden_size"],
                num_warmup_steps=transformer_config["num_warmup_steps"])

            optimizer = optimization.get_optimizer(transformer_config,
                                                   learning_rate)

            global_step = tf.compat.v1.train.get_global_step()

            if not transformer_config["use_bias"]:
                logging.info("Fixing position embedding, i.e. not trainable.")
                posemb = "pegasus/embeddings/position_embeddings"
                tvars = list(
                    filter(lambda v: v.name.split(":")[0] != posemb, tvars))

            gradients = optimizer.compute_gradients(total_loss, tvars)
            train_op = optimizer.apply_gradients(gradients,
                                                 global_step=global_step)

            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                host_call=utils.add_scalars_to_summary(
                    transformer_config["output_dir"],
                    {"learning_rate": learning_rate}))

        elif mode == tf.estimator.ModeKeys.EVAL:

            tokenizer = tft.SentencepieceTokenizer(model=tf.io.gfile.GFile(
                transformer_config["vocab_model_file"], "rb").read())

            def rouge_py_func(label_sent, pred_sent):
                """Approximate ROUGE scores, always run externally for final scores."""
                scorer = rouge_scorer.RougeScorer(
                    ["rouge1", "rouge2", "rougeLsum"], use_stemmer=True)
                r1, r2, rl = 0.0, 0.0, 0.0
                for ls, ps in zip(label_sent, pred_sent):
                    score = scorer.score(ls.decode("utf-8"),
                                         ps.decode("utf-8"))
                    r1 += score["rouge1"].fmeasure
                    r2 += score["rouge2"].fmeasure
                    rl += score["rougeLsum"].fmeasure
                return r1 / len(label_sent), r2 / len(label_sent), rl / len(
                    label_sent)

            def metric_fn(loss, log_probs, label_ids, pred_ids):
                loss = tf.compat.v1.metrics.mean(values=loss)
                log_probs = tf.compat.v1.metrics.mean(
                    values=log_probs,
                    weights=tf.cast(tf.not_equal(label_ids, 0), tf.float32))
                metric_dict = {
                    "prediction_loss": loss,
                    "log_likelihood": log_probs,
                }

                if not transformer_config["use_tpu"]:
                    # Approximate ROUGE scores if not running on tpus.
                    # Always run externally for final scores.
                    label_sent = tokenizer.detokenize(label_ids)
                    label_sent = tf.strings.regex_replace(
                        label_sent, r"([<\[]\S+[>\]])", b" \\1")
                    pred_sent = tokenizer.detokenize(pred_ids)
                    pred_sent = tf.strings.regex_replace(
                        pred_sent, r"([<\[]\S+[>\]])", b" \\1")
                    if transformer_config["substitute_newline"]:
                        label_sent = tf.strings.regex_replace(
                            label_sent,
                            transformer_config["substitute_newline"], "\n")
                        pred_sent = tf.strings.regex_replace(
                            pred_sent,
                            transformer_config["substitute_newline"], "\n")
                    rouge_value = tf.compat.v1.py_func(
                        func=rouge_py_func,
                        inp=[label_sent, pred_sent],
                        Tout=[tf.float64, tf.float64, tf.float64],
                        stateful=False)
                    rouge_value = tf.cast(rouge_value, tf.float32)
                    rouge1 = tf.compat.v1.metrics.mean(values=rouge_value[0])
                    rouge2 = tf.compat.v1.metrics.mean(values=rouge_value[1])
                    rougeL = tf.compat.v1.metrics.mean(values=rouge_value[2])  # pylint: disable=invalid-name

                    metric_dict.update({
                        "eval/Rouge-1": rouge1,
                        "eval/Rouge-2": rouge2,
                        "eval/Rouge-L": rougeL,
                    })
                return metric_dict

            eval_metrics = (metric_fn, [total_loss, llh, labels, pred_ids])
            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode, loss=total_loss, eval_metrics=eval_metrics)
        else:

            prediction_dict = {"pred_ids": pred_ids}
            if not transformer_config["use_tpu"]:
                tokenizer = tft.SentencepieceTokenizer(model=tf.io.gfile.GFile(
                    transformer_config["vocab_model_file"], "rb").read())
                pred_sent = tokenizer.detokenize(pred_ids)
                # Add a space before special tokens.
                pred_sent = tf.strings.regex_replace(pred_sent,
                                                     r"([<\[]\S+[>\]])",
                                                     b" \\1")
                if transformer_config["substitute_newline"]:
                    pred_sent = tf.strings.regex_replace(
                        pred_sent, transformer_config["substitute_newline"],
                        "\n")
                prediction_dict.update({"pred_sent": pred_sent})

            output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec(
                mode=mode, predictions=prediction_dict)

        return output_spec