def run_bert(strategy,
             input_meta_data,
             model_config,
             train_input_fn=None,
             eval_input_fn=None,
             init_checkpoint=None,
             custom_callbacks=None):
  """Run BERT training."""
  # Enables XLA in Session Config. Should not be set for TPU.
  keras_utils.set_session_config(FLAGS.enable_xla)
  performance.set_mixed_precision_policy(common_flags.dtype())

  epochs = FLAGS.num_train_epochs * FLAGS.num_eval_per_epoch
  train_data_size = (
      input_meta_data['train_data_size'] // FLAGS.num_eval_per_epoch)
  steps_per_epoch = int(train_data_size / FLAGS.train_batch_size)
  warmup_steps = int(epochs * train_data_size * 0.1 / FLAGS.train_batch_size)
  eval_steps = int(
      math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size))

  if not strategy:
    raise ValueError('Distribution strategy has not been specified.')

  if not custom_callbacks:
    custom_callbacks = []

  if FLAGS.log_steps:
    custom_callbacks.append(
        keras_utils.TimeHistory(
            batch_size=FLAGS.train_batch_size,
            log_steps=FLAGS.log_steps,
            logdir=FLAGS.model_dir))

  trained_model, _ = run_bert_classifier(
      strategy,
      model_config,
      input_meta_data,
      FLAGS.model_dir,
      epochs,
      steps_per_epoch,
      FLAGS.steps_per_loop,
      eval_steps,
      warmup_steps,
      FLAGS.learning_rate,
      init_checkpoint or FLAGS.init_checkpoint,
      train_input_fn,
      eval_input_fn,
      custom_callbacks=custom_callbacks)

  if FLAGS.model_export_path:
    model_saving_utils.export_bert_model(
        FLAGS.model_export_path, model=trained_model)
  return trained_model
Пример #2
0
def run_bert_pretrain(strategy, custom_callbacks=None):
    """Runs BERT pre-training."""

    bert_config = configs.BertConfig.from_json_file(FLAGS.bert_config_file)
    if not strategy:
        raise ValueError('Distribution strategy is not specified.')

    # Runs customized training loop.
    logging.info(
        'Training using customized training loop TF 2.0 with distributed'
        'strategy.')

    performance.set_mixed_precision_policy(common_flags.dtype(),
                                           use_experimental_api=False)

    # Only when explicit_allreduce = True, post_allreduce_callbacks and
    # allreduce_bytes_per_pack will take effect. optimizer.apply_gradients() no
    # longer implicitly allreduce gradients, users manually allreduce gradient and
    # pass the allreduced grads_and_vars to apply_gradients().
    # With explicit_allreduce = True, clip_by_global_norm is moved to after
    # allreduce.
    return run_customized_training(
        strategy,
        bert_config,
        FLAGS.init_checkpoint,  # Used to initialize only the BERT submodel.
        FLAGS.max_seq_length,
        FLAGS.max_predictions_per_seq,
        FLAGS.model_dir,
        FLAGS.num_steps_per_epoch,
        FLAGS.steps_per_loop,
        FLAGS.num_train_epochs,
        FLAGS.learning_rate,
        FLAGS.warmup_steps,
        FLAGS.end_lr,
        FLAGS.optimizer_type,
        FLAGS.input_files,
        FLAGS.train_batch_size,
        FLAGS.use_next_sentence_label,
        FLAGS.train_summary_interval,
        custom_callbacks=custom_callbacks,
        explicit_allreduce=FLAGS.explicit_allreduce,
        pre_allreduce_callbacks=[
            model_training_utils.clip_by_global_norm_callback
        ],
        allreduce_bytes_per_pack=FLAGS.allreduce_bytes_per_pack)
Пример #3
0
def run_electra_pretrain(strategy):
    """Runs Electra pre-training."""

    electra_config = electraconfigs.ElectraConfig.from_json_file(
        FLAGS.electra_config_file)
    if not strategy:
        raise ValueError('Distribution strategy is not specified.')

    # Runs customized training loop.
    logging.info(
        'Training using customized training loop TF 2.0 with distrubuted'
        'strategy.')

    performance.set_mixed_precision_policy(common_flags.dtype())
    return run_customized_training(
        strategy, electra_config, FLAGS.max_seq_length,
        FLAGS.max_predictions_per_seq, FLAGS.model_dir,
        FLAGS.num_steps_per_epoch, FLAGS.steps_per_loop,
        FLAGS.num_train_epochs, FLAGS.learning_rate, FLAGS.warmup_steps,
        FLAGS.input_files, FLAGS.train_batch_size)
Пример #4
0
def run_bert_pretrain(strategy, custom_callbacks=None):
    """Runs BERT pre-training."""

    bert_config = configs.BertConfig.from_json_file(FLAGS.bert_config_file)
    if not strategy:
        raise ValueError('Distribution strategy is not specified.')

    # Runs customized training loop.
    logging.info(
        'Training using customized training loop TF 2.0 with distributed'
        'strategy.')

    performance.set_mixed_precision_policy(common_flags.dtype())

    # If explicit_allreduce = True, apply_gradients() no longer implicitly
    # allreduce gradients, users manually allreduce gradient and pass the
    # allreduced grads_and_vars to apply_gradients(). clip_by_global_norm is kept
    # before allreduce, to be consistent with original TF1 model.
    return run_customized_training(
        strategy,
        bert_config,
        FLAGS.init_checkpoint,  # Used to initialize only the BERT submodel.
        FLAGS.max_seq_length,
        FLAGS.max_predictions_per_seq,
        FLAGS.model_dir,
        FLAGS.num_steps_per_epoch,
        FLAGS.steps_per_loop,
        FLAGS.num_train_epochs,
        FLAGS.learning_rate,
        FLAGS.warmup_steps,
        FLAGS.end_lr,
        FLAGS.optimizer_type,
        FLAGS.input_files,
        FLAGS.train_batch_size,
        FLAGS.use_next_sentence_label,
        FLAGS.train_summary_interval,
        custom_callbacks=custom_callbacks,
        explicit_allreduce=FLAGS.explicit_allreduce,
        pre_allreduce_callbacks=[common_flags.clip_by_global_norm_callback])
Пример #5
0
def train_squad(strategy,
                input_meta_data,
                bert_config,
                custom_callbacks=None,
                run_eagerly=False,
                init_checkpoint=None,
                sub_model_export_name=None):
    """Run bert squad training."""
    if strategy:
        logging.info(
            'Training using customized training loop with distribution'
            ' strategy.')
    # Enables XLA in Session Config. Should not be set for TPU.
    keras_utils.set_session_config(FLAGS.enable_xla)
    performance.set_mixed_precision_policy(common_flags.dtype())

    epochs = FLAGS.num_train_epochs
    num_train_examples = input_meta_data['train_data_size']
    max_seq_length = input_meta_data['max_seq_length']
    steps_per_epoch = int(num_train_examples / FLAGS.train_batch_size)
    warmup_steps = int(epochs * num_train_examples * 0.1 /
                       FLAGS.train_batch_size)
    train_input_fn = get_dataset_fn(FLAGS.train_data_path,
                                    max_seq_length,
                                    FLAGS.train_batch_size,
                                    is_training=True)

    def _get_squad_model():
        """Get Squad model and optimizer."""
        squad_model, core_model = bert_models.squad_model(
            bert_config,
            max_seq_length,
            hub_module_url=FLAGS.hub_module_url,
            hub_module_trainable=FLAGS.hub_module_trainable)
        optimizer = optimization.create_optimizer(FLAGS.learning_rate,
                                                  steps_per_epoch * epochs,
                                                  warmup_steps, FLAGS.end_lr,
                                                  FLAGS.optimizer_type)

        squad_model.optimizer = performance.configure_optimizer(
            optimizer,
            use_float16=common_flags.use_float16(),
            use_graph_rewrite=common_flags.use_graph_rewrite())
        return squad_model, core_model

    # Only when explicit_allreduce = True, post_allreduce_callbacks and
    # allreduce_bytes_per_pack will take effect. optimizer.apply_gradients() no
    # longer implicitly allreduce gradients, users manually allreduce gradient and
    # pass the allreduced grads_and_vars to apply_gradients().
    # With explicit_allreduce = True, clip_by_global_norm is moved to after
    # allreduce.
    model_training_utils.run_customized_training_loop(
        strategy=strategy,
        model_fn=_get_squad_model,
        loss_fn=get_loss_fn(),
        model_dir=FLAGS.model_dir,
        steps_per_epoch=steps_per_epoch,
        steps_per_loop=FLAGS.steps_per_loop,
        epochs=epochs,
        train_input_fn=train_input_fn,
        init_checkpoint=init_checkpoint or FLAGS.init_checkpoint,
        sub_model_export_name=sub_model_export_name,
        run_eagerly=run_eagerly,
        custom_callbacks=custom_callbacks,
        explicit_allreduce=FLAGS.explicit_allreduce,
        pre_allreduce_callbacks=[
            model_training_utils.clip_by_global_norm_callback
        ],
        allreduce_bytes_per_pack=FLAGS.allreduce_bytes_per_pack)
Пример #6
0
def train_squad(strategy,
                input_meta_data,
                bert_config,
                custom_callbacks=None,
                run_eagerly=False):
    """Run bert squad training."""
    if strategy:
        logging.info(
            'Training using customized training loop with distribution'
            ' strategy.')
    # Enables XLA in Session Config. Should not be set for TPU.
    keras_utils.set_config_v2(FLAGS.enable_xla)
    performance.set_mixed_precision_policy(common_flags.dtype())

    epochs = FLAGS.num_train_epochs
    num_train_examples = input_meta_data['train_data_size']
    max_seq_length = input_meta_data['max_seq_length']
    steps_per_epoch = int(num_train_examples / FLAGS.train_batch_size)
    warmup_steps = int(epochs * num_train_examples * 0.1 /
                       FLAGS.train_batch_size)
    train_input_fn = get_dataset_fn(FLAGS.train_data_path,
                                    max_seq_length,
                                    FLAGS.train_batch_size,
                                    is_training=True)

    def _get_squad_model():
        """Get Squad model and optimizer."""
        squad_model, core_model = bert_models.squad_model(
            bert_config,
            max_seq_length,
            hub_module_url=FLAGS.hub_module_url,
            hub_module_trainable=FLAGS.hub_module_trainable)
        optimizer = optimization.create_optimizer(FLAGS.learning_rate,
                                                  steps_per_epoch * epochs,
                                                  warmup_steps)

        squad_model.optimizer = performance.configure_optimizer(
            optimizer,
            use_float16=common_flags.use_float16(),
            use_graph_rewrite=common_flags.use_graph_rewrite())
        return squad_model, core_model

    # If explicit_allreduce = True, apply_gradients() no longer implicitly
    # allreduce gradients, users manually allreduce gradient and pass the
    # allreduced grads_and_vars to apply_gradients(). clip_by_global_norm will be
    # applied to allreduced gradients.
    def clip_by_global_norm_callback(grads_and_vars):
        grads, variables = zip(*grads_and_vars)
        (clipped_grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)
        return zip(clipped_grads, variables)

    model_training_utils.run_customized_training_loop(
        strategy=strategy,
        model_fn=_get_squad_model,
        loss_fn=get_loss_fn(),
        model_dir=FLAGS.model_dir,
        steps_per_epoch=steps_per_epoch,
        steps_per_loop=FLAGS.steps_per_loop,
        epochs=epochs,
        train_input_fn=train_input_fn,
        init_checkpoint=FLAGS.init_checkpoint,
        run_eagerly=run_eagerly,
        custom_callbacks=custom_callbacks,
        explicit_allreduce=False,
        post_allreduce_callbacks=[clip_by_global_norm_callback])
Пример #7
0
def run_bert(strategy,
             input_meta_data,
             model_config,
             train_input_fn=None,
             eval_input_fn=None):
    """Run BERT training."""
    if FLAGS.mode == 'export_only':
        # As Keras ModelCheckpoint callback used with Keras compile/fit() API
        # internally uses model.save_weights() to save checkpoints, we must
        # use model.load_weights() when Keras compile/fit() is used.
        export_classifier(FLAGS.model_export_path, input_meta_data,
                          FLAGS.use_keras_compile_fit, model_config,
                          FLAGS.model_dir)
        return

    if FLAGS.mode != 'train_and_eval':
        raise ValueError('Unsupported mode is specified: %s' % FLAGS.mode)
    # Enables XLA in Session Config. Should not be set for TPU.
    keras_utils.set_config_v2(FLAGS.enable_xla)
    performance.set_mixed_precision_policy(common_flags.dtype())

    epochs = FLAGS.num_train_epochs
    train_data_size = input_meta_data['train_data_size']
    steps_per_epoch = int(train_data_size / FLAGS.train_batch_size)
    warmup_steps = int(epochs * train_data_size * 0.1 / FLAGS.train_batch_size)
    eval_steps = int(
        math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size))

    if not strategy:
        raise ValueError('Distribution strategy has not been specified.')

    if FLAGS.log_steps:
        custom_callbacks = [
            keras_utils.TimeHistory(
                batch_size=FLAGS.train_batch_size,
                log_steps=FLAGS.log_steps,
                logdir=FLAGS.model_dir,
            )
        ]
    else:
        custom_callbacks = None

    trained_model = run_bert_classifier(
        strategy,
        model_config,
        input_meta_data,
        FLAGS.model_dir,
        epochs,
        steps_per_epoch,
        FLAGS.steps_per_loop,
        eval_steps,
        warmup_steps,
        FLAGS.learning_rate,
        FLAGS.init_checkpoint,
        train_input_fn,
        eval_input_fn,
        run_eagerly=FLAGS.run_eagerly,
        use_keras_compile_fit=FLAGS.use_keras_compile_fit,
        custom_callbacks=custom_callbacks)

    if FLAGS.model_export_path:
        # As Keras ModelCheckpoint callback used with Keras compile/fit() API
        # internally uses model.save_weights() to save checkpoints, we must
        # use model.load_weights() when Keras compile/fit() is used.
        model_saving_utils.export_bert_model(
            FLAGS.model_export_path,
            model=trained_model,
            restore_model_using_load_weights=FLAGS.use_keras_compile_fit)
    return trained_model