示例#1
0
 def model_fn(features, labels, mode, params):
   """Build the model for training."""
   model = PretrainingModel(config, features,
                            mode == tf.estimator.ModeKeys.TRAIN)
   utils.log("Model is built!")
   if mode == tf.estimator.ModeKeys.TRAIN:
     train_op = optimization.create_optimizer(
         model.total_loss, config.learning_rate, config.num_train_steps,
         weight_decay_rate=config.weight_decay_rate,
         use_tpu=config.use_tpu,
         warmup_steps=config.num_warmup_steps,
         lr_decay_power=config.lr_decay_power
     )
     output_spec = tf.estimator.tpu.TPUEstimatorSpec(
         mode=mode,
         loss=model.total_loss,
         train_op=train_op,
         training_hooks=[training_utils.ETAHook(
             {} if config.use_tpu else dict(loss=model.total_loss),
             config.num_train_steps, config.iterations_per_loop,
             config.use_tpu)]
     )
   elif mode == tf.estimator.ModeKeys.EVAL:
     output_spec = tf.estimator.tpu.TPUEstimatorSpec(
         mode=mode,
         loss=model.total_loss,
         eval_metrics=model.eval_metrics,
         evaluation_hooks=[training_utils.ETAHook(
             {} if config.use_tpu else dict(loss=model.total_loss),
             config.num_eval_steps, config.iterations_per_loop,
             config.use_tpu, is_training=False)])
   else:
     raise ValueError("Only TRAIN and EVAL modes are supported")
   return output_spec
示例#2
0
    def model_fn(features, labels, mode, params):
        """Build the model for training."""
        model = PretrainingModel(config, features,
                                 mode == tf.estimator.ModeKeys.TRAIN)
        utils.log("Model is built!")
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op, optimizer = optimization.create_optimizer(
                model.total_loss,
                config.learning_rate,
                config.num_train_steps,
                weight_decay_rate=config.weight_decay_rate,
                use_tpu=config.use_tpu,
                warmup_steps=config.num_warmup_steps,
                lr_decay_power=config.lr_decay_power)

            eta_hook = training_utils.ETAHook({} if config.use_tpu else dict(
                Total_loss=model.total_loss,
                MLM_loss=model.mlm_output_loss,
                RTD_loss=model.disc_output_loss,
                learning_rate=optimizer.learning_rate,
                MLM_accuracy=model.metrics['masked_lm_accuracy'],
                Sampled_MLM_accuracy=model.
                metrics['sampled_masked_lm_accuracy'],
                RTD_accuracy=model.metrics['disc_accuracy'],
                RTD_precision=model.metrics['disc_precision'],
                RTD_recall=model.metrics['disc_recall'],
                RTD_auc=model.metrics['disc_auc'],
            ), config.num_train_steps, config.iterations_per_loop,
                                              config.use_tpu)

            output_spec = tf.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=model.total_loss,
                train_op=train_op,
                training_hooks=[eta_hook])
        elif mode == tf.estimator.ModeKeys.EVAL:
            output_spec = tf.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=model.total_loss,
                eval_metrics=model.eval_metrics,
                evaluation_hooks=[
                    training_utils.ETAHook({} if config.use_tpu else dict(
                        loss=model.total_loss,
                        mlm_loss=model.mlm_output_loss,
                        disc_loss=model.disc_output_loss),
                                           config.num_eval_steps,
                                           config.iterations_per_loop,
                                           config.use_tpu,
                                           is_training=False)
                ])
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported")
        return output_spec
示例#3
0
    def model_fn(features, labels, mode, params):
        """The `model_fn` for TPUEstimator."""
        utils.log("Building model...")
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = FinetuningModel(config, tasks, is_training, features,
                                num_train_steps)

        # Load pre-trained weights from checkpoint
        init_checkpoint = config.init_checkpoint
        if pretraining_config is not None:
            init_checkpoint = tf.train.latest_checkpoint(
                pretraining_config.model_dir)
            utils.log("Using checkpoint", init_checkpoint)
        tvars = tf.trainable_variables()
        scaffold_fn = None
        if init_checkpoint:
            assignment_map, _ = modeling.get_assignment_map_from_checkpoint(
                tvars, init_checkpoint)
            if config.use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        # Build model for training or prediction
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(
                model.loss,
                config.learning_rate,
                num_train_steps,
                weight_decay_rate=config.weight_decay_rate,
                use_tpu=config.use_tpu,
                warmup_proportion=config.warmup_proportion,
                layerwise_lr_decay_power=config.layerwise_lr_decay,
                n_transformer_layers=model.bert_config.num_hidden_layers)
            output_spec = tf.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=model.loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn,
                training_hooks=[
                    training_utils.ETAHook(
                        {} if config.use_tpu else dict(loss=model.loss),
                        num_train_steps, config.iterations_per_loop,
                        config.use_tpu, 10)
                ])
        else:
            assert mode == tf.estimator.ModeKeys.PREDICT
            output_spec = tf.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions=utils.flatten_dict(model.outputs),
                scaffold_fn=scaffold_fn)

        utils.log("Building complete")
        return output_spec
示例#4
0
    def model_fn(features, labels, mode, params):
        """The `model_fn` for TPUEstimator."""
        utils.log("Building model...")
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = FinetuningModel(config, tasks, is_training, features,
                                num_train_steps)

        # Load pre-trained weights from checkpoint
        init_checkpoint = config.init_checkpoint
        if pretraining_config is not None:
            init_checkpoint = tf.train.latest_checkpoint(
                pretraining_config.model_dir)
            utils.log("Using checkpoint", init_checkpoint)
        tvars = tf.trainable_variables()
        scaffold_fn = None
        initialized_variable_names = {}
        if init_checkpoint:
            utils.log("Using checkpoint", init_checkpoint)
            assignment_map, initialized_variable_names = modeling.get_assignment_map_from_checkpoint(
                tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        utils.log("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            utils.logerr("  name = %s, shape = %s%s", var.name, var.shape,
                         init_string)

        # Build model for training or prediction
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(
                model.loss,
                config.learning_rate,
                num_train_steps,
                weight_decay_rate=config.weight_decay_rate,
                warmup_proportion=config.warmup_proportion,
                n_transformer_layers=model.bert_config.num_hidden_layers)
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                loss=model.loss,
                train_op=train_op,
                training_hooks=[
                    training_utils.ETAHook(
                        {} if config.use_tpu else dict(loss=model.loss),
                        num_train_steps, config.iterations_per_loop,
                        config.use_tpu, 10)
                ])
        else:
            assert mode == tf.estimator.ModeKeys.PREDICT
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode, predictions=utils.flatten_dict(model.outputs))

        utils.log("Building complete")
        return output_spec
示例#5
0
    def model_fn(features, labels, mode, params):
        """Build the model for training."""
        if config.masking_strategy == pretrain_helpers.ADVERSARIAL_STRATEGY or config.masking_strategy == pretrain_helpers.MIX_ADV_STRATEGY:
            model = AdversarialPretrainingModel(
                config, features, mode == tf.estimator.ModeKeys.TRAIN)
        elif config.masking_strategy == pretrain_helpers.RW_STRATEGY:
            ratio = []
            with open(config.ratio_file, "r") as fin:
                for line in fin:
                    line = line.strip()
                    if line:
                        tok = line.split()
                        ratio.append(float(tok[1]))
            model = RatioBasedPretrainingModel(
                config, features, ratio, mode == tf.estimator.ModeKeys.TRAIN)
        else:
            model = PretrainingModel(config, features,
                                     mode == tf.estimator.ModeKeys.TRAIN)
        utils.log("Model is built!")

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        if config.init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, config.init_checkpoint)
            tf.train.init_from_checkpoint(config.init_checkpoint,
                                          assignment_map)

        utils.log("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            utils.log("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

        if mode == tf.estimator.ModeKeys.TRAIN:
            if config.masking_strategy == pretrain_helpers.ADVERSARIAL_STRATEGY:
                student_train_op = optimization.create_optimizer(
                    model.mlm_loss,
                    config.learning_rate,
                    config.num_train_steps,
                    weight_decay_rate=config.weight_decay_rate,
                    use_tpu=config.use_tpu,
                    warmup_steps=config.num_warmup_steps,
                    lr_decay_power=config.lr_decay_power)
                teacher_train_op = optimization.create_optimizer(
                    model.teacher_loss,
                    config.teacher_learning_rate,
                    config.num_train_steps,
                    lr_decay_power=config.lr_decay_power)
                train_op = tf.group(student_train_op, teacher_train_op)
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=model.total_loss,
                    train_op=train_op,
                    training_hooks=[
                        training_utils.ETAHook(
                            dict(loss=model.mlm_loss,
                                 teacher_loss=model.teacher_loss,
                                 reward=model._baseline),
                            config.num_train_steps, config.iterations_per_loop,
                            config.use_tpu)
                    ])
            else:
                train_op = optimization.create_optimizer(
                    model.total_loss,
                    config.learning_rate,
                    config.num_train_steps,
                    weight_decay_rate=config.weight_decay_rate,
                    use_tpu=config.use_tpu,
                    warmup_steps=config.num_warmup_steps,
                    lr_decay_power=config.lr_decay_power)
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=model.total_loss,
                    train_op=train_op,
                    training_hooks=[
                        training_utils.ETAHook(dict(loss=model.total_loss),
                                               config.num_train_steps,
                                               config.iterations_per_loop,
                                               config.use_tpu)
                    ])
        elif mode == tf.estimator.ModeKeys.EVAL:
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                loss=model.total_loss,
                eval_metric_ops=model.eval_metrics,
                evaluation_hooks=[
                    training_utils.ETAHook(dict(loss=model.total_loss),
                                           config.num_eval_steps,
                                           config.iterations_per_loop,
                                           config.use_tpu,
                                           is_training=False)
                ])
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported")
        return output_spec
示例#6
0
    def model_fn(features, labels, mode, params):
        """Build the model for training."""
        model = PretrainingModel(
            config=config,
            features=features,
            is_training=mode == tf.estimator.ModeKeys.TRAIN,
            init_checkpoint=config.init_checkpoint)
        utils.log("Model is built!")
        to_log = {
            "gen_loss": model.mlm_output.loss,
            "disc_loss": model.disc_output.loss,
            "total_loss": model.total_loss
        }
        if mode == tf.estimator.ModeKeys.TRAIN:

            tf.summary.scalar('gen_loss', model.mlm_output.loss)
            tf.summary.scalar('disc_loss', model.disc_output.loss)
            tf.summary.scalar('total_loss', model.total_loss)

            lr_multiplier = hvd.size() if config.scale_lr else 1
            train_op = optimization.create_optimizer(
                loss=model.total_loss,
                learning_rate=config.learning_rate * lr_multiplier,
                num_train_steps=config.num_train_steps,
                weight_decay_rate=config.weight_decay_rate,
                warmup_steps=config.num_warmup_steps,
                warmup_proportion=0,
                lr_decay_power=config.lr_decay_power,
                layerwise_lr_decay_power=-1,
                n_transformer_layers=None,
                hvd=hvd,
                use_fp16=config.use_fp16,
                num_accumulation_steps=config.num_accumulation_steps,
                allreduce_post_accumulation=config.allreduce_post_accumulation)
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                loss=model.total_loss,
                train_op=train_op,
                training_hooks=[
                    training_utils.ETAHook(
                        to_log=to_log,
                        n_steps=config.num_train_steps,
                        iterations_per_loop=config.iterations_per_loop,
                        on_tpu=False,
                        log_every=1,
                        is_training=True)
                ])
        elif mode == tf.estimator.ModeKeys.EVAL:
            output_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                loss=model.total_loss,
                eval_metrics=model.eval_metrics,
                evaluation_hooks=[
                    training_utils.ETAHook(
                        to_log=to_log,
                        n_steps=config.num_eval_steps,
                        iterations_per_loop=config.iterations_per_loop,
                        on_tpu=False,
                        log_every=1,
                        is_training=False)
                ])
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported")
        return output_spec
示例#7
0
  def model_fn(features, labels, mode, params):
    """Build the model for training."""
    model = PretrainingModel(config, features,
                             mode == tf.estimator.ModeKeys.TRAIN)
    utils.log("Model is built!")

    # Load pre-trained weights from checkpoint
    tvars = tf.trainable_variables()

    init_checkpoint = tf.train.latest_checkpoint(config.init_checkpoint)
    utils.log("Using checkpoint", init_checkpoint)
    tvars = tf.trainable_variables()

    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      assignment_map, initialized_variable_names = modeling.get_assignment_map_from_checkpoint(
          tvars, init_checkpoint)
      if config.use_tpu:
        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()
        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    utils.log("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      utils.log("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    if mode == tf.estimator.ModeKeys.TRAIN:
      train_op = optimization.create_optimizer(
          model.total_loss, config.learning_rate, config.num_train_steps,
          weight_decay_rate=config.weight_decay_rate,
          use_tpu=config.use_tpu,
          warmup_steps=config.num_warmup_steps,
          lr_decay_power=config.lr_decay_power
      )
      output_spec = tf.estimator.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=model.total_loss,
          train_op=train_op,
          scaffold_fn=scaffold_fn,
          training_hooks=[training_utils.ETAHook(
              {} if config.use_tpu else dict(loss=model.total_loss),
              config.num_train_steps, config.iterations_per_loop,
              config.use_tpu)]
      )
    elif mode == tf.estimator.ModeKeys.EVAL:
      output_spec = tf.estimator.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=model.total_loss,
          scaffold_fn=scaffold_fn,
          eval_metrics=model.eval_metrics,
          evaluation_hooks=[training_utils.ETAHook(
              {} if config.use_tpu else dict(loss=model.total_loss),
              config.num_eval_steps, config.iterations_per_loop,
              config.use_tpu, is_training=False)])
    else:
      raise ValueError("Only TRAIN and EVAL modes are supported")
    return output_spec