def model_fn(features, labels, mode, params): """Build the model for training.""" model = PretrainingModel(config, features, mode == tf.estimator.ModeKeys.TRAIN) utils.log("Model is built!") if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( model.total_loss, config.learning_rate, config.num_train_steps, weight_decay_rate=config.weight_decay_rate, use_tpu=config.use_tpu, warmup_steps=config.num_warmup_steps, lr_decay_power=config.lr_decay_power ) output_spec = tf.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=model.total_loss, train_op=train_op, training_hooks=[training_utils.ETAHook( {} if config.use_tpu else dict(loss=model.total_loss), config.num_train_steps, config.iterations_per_loop, config.use_tpu)] ) elif mode == tf.estimator.ModeKeys.EVAL: output_spec = tf.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=model.total_loss, eval_metrics=model.eval_metrics, evaluation_hooks=[training_utils.ETAHook( {} if config.use_tpu else dict(loss=model.total_loss), config.num_eval_steps, config.iterations_per_loop, config.use_tpu, is_training=False)]) else: raise ValueError("Only TRAIN and EVAL modes are supported") return output_spec
def model_fn(features, labels, mode, params): """Build the model for training.""" model = PretrainingModel(config, features, mode == tf.estimator.ModeKeys.TRAIN) utils.log("Model is built!") if mode == tf.estimator.ModeKeys.TRAIN: train_op, optimizer = optimization.create_optimizer( model.total_loss, config.learning_rate, config.num_train_steps, weight_decay_rate=config.weight_decay_rate, use_tpu=config.use_tpu, warmup_steps=config.num_warmup_steps, lr_decay_power=config.lr_decay_power) eta_hook = training_utils.ETAHook({} if config.use_tpu else dict( Total_loss=model.total_loss, MLM_loss=model.mlm_output_loss, RTD_loss=model.disc_output_loss, learning_rate=optimizer.learning_rate, MLM_accuracy=model.metrics['masked_lm_accuracy'], Sampled_MLM_accuracy=model. metrics['sampled_masked_lm_accuracy'], RTD_accuracy=model.metrics['disc_accuracy'], RTD_precision=model.metrics['disc_precision'], RTD_recall=model.metrics['disc_recall'], RTD_auc=model.metrics['disc_auc'], ), config.num_train_steps, config.iterations_per_loop, config.use_tpu) output_spec = tf.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=model.total_loss, train_op=train_op, training_hooks=[eta_hook]) elif mode == tf.estimator.ModeKeys.EVAL: output_spec = tf.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=model.total_loss, eval_metrics=model.eval_metrics, evaluation_hooks=[ training_utils.ETAHook({} if config.use_tpu else dict( loss=model.total_loss, mlm_loss=model.mlm_output_loss, disc_loss=model.disc_output_loss), config.num_eval_steps, config.iterations_per_loop, config.use_tpu, is_training=False) ]) else: raise ValueError("Only TRAIN and EVAL modes are supported") return output_spec
def model_fn(features, labels, mode, params): """The `model_fn` for TPUEstimator.""" utils.log("Building model...") is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = FinetuningModel(config, tasks, is_training, features, num_train_steps) # Load pre-trained weights from checkpoint init_checkpoint = config.init_checkpoint if pretraining_config is not None: init_checkpoint = tf.train.latest_checkpoint( pretraining_config.model_dir) utils.log("Using checkpoint", init_checkpoint) tvars = tf.trainable_variables() scaffold_fn = None if init_checkpoint: assignment_map, _ = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if config.use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) # Build model for training or prediction if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( model.loss, config.learning_rate, num_train_steps, weight_decay_rate=config.weight_decay_rate, use_tpu=config.use_tpu, warmup_proportion=config.warmup_proportion, layerwise_lr_decay_power=config.layerwise_lr_decay, n_transformer_layers=model.bert_config.num_hidden_layers) output_spec = tf.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=model.loss, train_op=train_op, scaffold_fn=scaffold_fn, training_hooks=[ training_utils.ETAHook( {} if config.use_tpu else dict(loss=model.loss), num_train_steps, config.iterations_per_loop, config.use_tpu, 10) ]) else: assert mode == tf.estimator.ModeKeys.PREDICT output_spec = tf.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=utils.flatten_dict(model.outputs), scaffold_fn=scaffold_fn) utils.log("Building complete") return output_spec
def model_fn(features, labels, mode, params): """The `model_fn` for TPUEstimator.""" utils.log("Building model...") is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = FinetuningModel(config, tasks, is_training, features, num_train_steps) # Load pre-trained weights from checkpoint init_checkpoint = config.init_checkpoint if pretraining_config is not None: init_checkpoint = tf.train.latest_checkpoint( pretraining_config.model_dir) utils.log("Using checkpoint", init_checkpoint) tvars = tf.trainable_variables() scaffold_fn = None initialized_variable_names = {} if init_checkpoint: utils.log("Using checkpoint", init_checkpoint) assignment_map, initialized_variable_names = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) utils.log("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" utils.logerr(" name = %s, shape = %s%s", var.name, var.shape, init_string) # Build model for training or prediction if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( model.loss, config.learning_rate, num_train_steps, weight_decay_rate=config.weight_decay_rate, warmup_proportion=config.warmup_proportion, n_transformer_layers=model.bert_config.num_hidden_layers) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=model.loss, train_op=train_op, training_hooks=[ training_utils.ETAHook( {} if config.use_tpu else dict(loss=model.loss), num_train_steps, config.iterations_per_loop, config.use_tpu, 10) ]) else: assert mode == tf.estimator.ModeKeys.PREDICT output_spec = tf.estimator.EstimatorSpec( mode=mode, predictions=utils.flatten_dict(model.outputs)) utils.log("Building complete") return output_spec
def model_fn(features, labels, mode, params): """Build the model for training.""" if config.masking_strategy == pretrain_helpers.ADVERSARIAL_STRATEGY or config.masking_strategy == pretrain_helpers.MIX_ADV_STRATEGY: model = AdversarialPretrainingModel( config, features, mode == tf.estimator.ModeKeys.TRAIN) elif config.masking_strategy == pretrain_helpers.RW_STRATEGY: ratio = [] with open(config.ratio_file, "r") as fin: for line in fin: line = line.strip() if line: tok = line.split() ratio.append(float(tok[1])) model = RatioBasedPretrainingModel( config, features, ratio, mode == tf.estimator.ModeKeys.TRAIN) else: model = PretrainingModel(config, features, mode == tf.estimator.ModeKeys.TRAIN) utils.log("Model is built!") tvars = tf.trainable_variables() initialized_variable_names = {} if config.init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, config.init_checkpoint) tf.train.init_from_checkpoint(config.init_checkpoint, assignment_map) utils.log("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" utils.log(" name = %s, shape = %s%s", var.name, var.shape, init_string) if mode == tf.estimator.ModeKeys.TRAIN: if config.masking_strategy == pretrain_helpers.ADVERSARIAL_STRATEGY: student_train_op = optimization.create_optimizer( model.mlm_loss, config.learning_rate, config.num_train_steps, weight_decay_rate=config.weight_decay_rate, use_tpu=config.use_tpu, warmup_steps=config.num_warmup_steps, lr_decay_power=config.lr_decay_power) teacher_train_op = optimization.create_optimizer( model.teacher_loss, config.teacher_learning_rate, config.num_train_steps, lr_decay_power=config.lr_decay_power) train_op = tf.group(student_train_op, teacher_train_op) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=model.total_loss, train_op=train_op, training_hooks=[ training_utils.ETAHook( dict(loss=model.mlm_loss, teacher_loss=model.teacher_loss, reward=model._baseline), config.num_train_steps, config.iterations_per_loop, config.use_tpu) ]) else: train_op = optimization.create_optimizer( model.total_loss, config.learning_rate, config.num_train_steps, weight_decay_rate=config.weight_decay_rate, use_tpu=config.use_tpu, warmup_steps=config.num_warmup_steps, lr_decay_power=config.lr_decay_power) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=model.total_loss, train_op=train_op, training_hooks=[ training_utils.ETAHook(dict(loss=model.total_loss), config.num_train_steps, config.iterations_per_loop, config.use_tpu) ]) elif mode == tf.estimator.ModeKeys.EVAL: output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=model.total_loss, eval_metric_ops=model.eval_metrics, evaluation_hooks=[ training_utils.ETAHook(dict(loss=model.total_loss), config.num_eval_steps, config.iterations_per_loop, config.use_tpu, is_training=False) ]) else: raise ValueError("Only TRAIN and EVAL modes are supported") return output_spec
def model_fn(features, labels, mode, params): """Build the model for training.""" model = PretrainingModel( config=config, features=features, is_training=mode == tf.estimator.ModeKeys.TRAIN, init_checkpoint=config.init_checkpoint) utils.log("Model is built!") to_log = { "gen_loss": model.mlm_output.loss, "disc_loss": model.disc_output.loss, "total_loss": model.total_loss } if mode == tf.estimator.ModeKeys.TRAIN: tf.summary.scalar('gen_loss', model.mlm_output.loss) tf.summary.scalar('disc_loss', model.disc_output.loss) tf.summary.scalar('total_loss', model.total_loss) lr_multiplier = hvd.size() if config.scale_lr else 1 train_op = optimization.create_optimizer( loss=model.total_loss, learning_rate=config.learning_rate * lr_multiplier, num_train_steps=config.num_train_steps, weight_decay_rate=config.weight_decay_rate, warmup_steps=config.num_warmup_steps, warmup_proportion=0, lr_decay_power=config.lr_decay_power, layerwise_lr_decay_power=-1, n_transformer_layers=None, hvd=hvd, use_fp16=config.use_fp16, num_accumulation_steps=config.num_accumulation_steps, allreduce_post_accumulation=config.allreduce_post_accumulation) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=model.total_loss, train_op=train_op, training_hooks=[ training_utils.ETAHook( to_log=to_log, n_steps=config.num_train_steps, iterations_per_loop=config.iterations_per_loop, on_tpu=False, log_every=1, is_training=True) ]) elif mode == tf.estimator.ModeKeys.EVAL: output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=model.total_loss, eval_metrics=model.eval_metrics, evaluation_hooks=[ training_utils.ETAHook( to_log=to_log, n_steps=config.num_eval_steps, iterations_per_loop=config.iterations_per_loop, on_tpu=False, log_every=1, is_training=False) ]) else: raise ValueError("Only TRAIN and EVAL modes are supported") return output_spec
def model_fn(features, labels, mode, params): """Build the model for training.""" model = PretrainingModel(config, features, mode == tf.estimator.ModeKeys.TRAIN) utils.log("Model is built!") # Load pre-trained weights from checkpoint tvars = tf.trainable_variables() init_checkpoint = tf.train.latest_checkpoint(config.init_checkpoint) utils.log("Using checkpoint", init_checkpoint) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: assignment_map, initialized_variable_names = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if config.use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) utils.log("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" utils.log(" name = %s, shape = %s%s", var.name, var.shape, init_string) if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( model.total_loss, config.learning_rate, config.num_train_steps, weight_decay_rate=config.weight_decay_rate, use_tpu=config.use_tpu, warmup_steps=config.num_warmup_steps, lr_decay_power=config.lr_decay_power ) output_spec = tf.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=model.total_loss, train_op=train_op, scaffold_fn=scaffold_fn, training_hooks=[training_utils.ETAHook( {} if config.use_tpu else dict(loss=model.total_loss), config.num_train_steps, config.iterations_per_loop, config.use_tpu)] ) elif mode == tf.estimator.ModeKeys.EVAL: output_spec = tf.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=model.total_loss, scaffold_fn=scaffold_fn, eval_metrics=model.eval_metrics, evaluation_hooks=[training_utils.ETAHook( {} if config.use_tpu else dict(loss=model.total_loss), config.num_eval_steps, config.iterations_per_loop, config.use_tpu, is_training=False)]) else: raise ValueError("Only TRAIN and EVAL modes are supported") return output_spec