def _build_train_graph(self): with tf.device('/gpu:%d' % self.hparams.gpu_num[0]): bert_model = modeling_base.BertModel( config=self.bert_config, is_training=False, input_ids=self.knowledge_ids_ph, input_mask=self.knowledge_mask_ph, token_type_ids=self.knowledge_seg_ids_ph, use_one_hot_embeddings=False, scope='bert', hparams=self.hparams) self.bert_sequence_outputs = bert_model.get_sequence_output()
def analysis_evaluate(self, saved_file: str): # saved_file example config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False, ) config.gpu_options.allow_growth = True self.train_setup_vars["do_evaluate"] = True self.train_setup_vars["is_train_continue"] = True tf.reset_default_graph() self.sess = tf.Session(config=config) self._make_placeholders() # self._build_train_graph() bert_model = modeling_base.BertModel( config=self.bert_config, is_training=False, input_ids=self.input_ids_ph, input_mask=self.input_mask_ph, token_type_ids=self.segment_ids_ph, dialog_position_ids=None, use_adapter_layer=False, scope='bert', hparams=self.hparams) input_phs = (self.input_ids_ph, self.input_mask_ph) length_phs = (self.dialog_len_ph, self.response_len_ph, self.knowledge_len_ph) label_ids_phs = (self.label_ids_ph, self.knowledge_label_ids_ph) created_model = self.hparams.graph.Model(self.hparams, self.bert_config, bert_model, input_phs, label_ids_phs, length_phs, None, None) self.logits, self.seq_outputs = created_model.build_graph() self.sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(self.sess, saved_file) self._logger.info("Evaluation Step - Test") self.analysis_run_evaluate()
def build_graph(self): if not self.train_setup_vars["do_evaluate"]: is_training = True else: is_training = False use_one_hot_embeddings = False input_ids, input_mask, segment_ids = self.input_phs # load bert model bert_model = modeling_base.BertModel( config=self.bert_config, dropout_prob=self.dropout_prob_ph, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings, scope='bert', hparams=self.hparams) pooled_output = bert_model.get_pooled_output() return self._final_output_layer(pooled_output, self.label_id_phs)
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] masked_lm_positions = features["masked_lm_positions"] masked_lm_ids = features["masked_lm_ids"] masked_lm_weights = features["masked_lm_weights"] next_sentence_labels = features["next_sentence_labels"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling_base.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings, use_domain_embeddings=True, scope="bert") (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) (next_sentence_loss, next_sentence_example_loss, next_sentence_log_probs) = get_next_sentence_output( bert_config, model.get_pooled_output(), next_sentence_labels) total_loss = masked_lm_loss + next_sentence_loss tvars = tf.trainable_variables() """fix_start""" # vars_in_checkpoint = tf.train.list_variables(init_checkpoint) # checkpoint_vars = [] # for var_name, _ in vars_in_checkpoint: # checkpoint_vars.append(var_name) # print("pretrained_var : ", var_name) """fix_end""" initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling_utils.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels): """Computes the loss and accuracy of the model.""" masked_lm_log_probs = tf.reshape( masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]]) masked_lm_predictions = tf.argmax(masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1]) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) masked_lm_accuracy = tf.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) masked_lm_mean_loss = tf.metrics.mean( values=masked_lm_example_loss, weights=masked_lm_weights) next_sentence_log_probs = tf.reshape( next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]]) next_sentence_predictions = tf.argmax(next_sentence_log_probs, axis=-1, output_type=tf.int32) next_sentence_labels = tf.reshape(next_sentence_labels, [-1]) next_sentence_accuracy = tf.metrics.accuracy( labels=next_sentence_labels, predictions=next_sentence_predictions) next_sentence_mean_loss = tf.metrics.mean( values=next_sentence_example_loss) return { "masked_lm_accuracy": masked_lm_accuracy, "masked_lm_loss": masked_lm_mean_loss, "next_sentence_accuracy": next_sentence_accuracy, "next_sentence_loss": next_sentence_mean_loss, } eval_metrics = (metric_fn, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec
def _build_train_graph(self): gpu_num = len(self.hparams.gpu_num) if gpu_num > 1: print("-" * 10, "Using %d Multi-GPU" % gpu_num, "-" * 10) else: print("-" * 10, "Using Single-GPU", "-" * 10) if not self.train_setup_vars["do_evaluate"]: is_training = True else: is_training = False use_one_hot_embeddings = False input_ids_ph = tf.split(self.input_ids_ph, gpu_num, 0) input_mask_ph = tf.split(self.input_mask_ph, gpu_num, 0) segment_ids_ph = tf.split(self.segment_ids_ph, gpu_num, 0) dialog_position_ids_ph = tf.split(self.dialog_position_ids_ph, gpu_num, 0) dialog_len_ph = tf.split(self.dialog_len_ph, gpu_num, 0) response_len_ph = tf.split(self.response_len_ph, gpu_num, 0) knowledge_tokens_ph = tf.split(self.knowledge_tokens_ph, gpu_num, 0) knowledge_len_ph = tf.split(self.knowledge_len_ph, gpu_num, 0) knowledge_label_ids_ph = tf.split(self.knowledge_label_ids_ph, gpu_num, 0) similar_input_ids_ph = tf.split(self.similar_input_ids_ph, gpu_num, 0) similar_input_mask_ph = tf.split(self.similar_input_mask_ph, gpu_num, 0) similar_len_ph = tf.split(self.similar_input_len_ph, gpu_num, 0) label_ids_ph = tf.split(self.label_ids_ph, gpu_num, 0) # is_real_examples_ph = tf.split(self.is_real_examples_ph, gpu_num, 0) tower_grads = [] tot_losses = [] tot_logits = [] tot_labels = [] tot_outputs = [] tot_argmax = [] tvars = [] for i, gpu_id in enumerate(self.hparams.gpu_num): with tf.device('/gpu:%d' % gpu_id): with tf.variable_scope('', reuse=tf.AUTO_REUSE): print("bert_graph_multi_gpu :", gpu_id) if self.hparams.do_dialog_state_embedding: each_dialog_position_ids = dialog_position_ids_ph[i] else: each_dialog_position_ids = None bert_model = modeling_base.BertModel( config=self.bert_config, is_training=is_training, input_ids=input_ids_ph[i], input_mask=input_mask_ph[i], token_type_ids=segment_ids_ph[i], dialog_position_ids=each_dialog_position_ids, use_adapter_layer=self.hparams.do_adapter_layer, scope='bert', hparams=self.hparams) input_phs = (input_ids_ph[i], input_mask_ph[i]) length_phs = (dialog_len_ph[i], response_len_ph[i], knowledge_len_ph[i]) knowledge_phs = knowledge_tokens_ph[i] similar_phs = None if self.hparams.do_similar_dialog: similar_phs = (similar_input_ids_ph[i], similar_input_mask_ph[i], similar_len_ph[i]) label_ids_phs = (label_ids_ph[i], knowledge_label_ids_ph[i]) created_model = self.hparams.graph.Model( self.hparams, self.bert_config, bert_model, input_phs, label_ids_phs, length_phs, knowledge_phs, similar_phs) logits, loss_op, seq_outputs, sentence_argmax = created_model.build_graph( ) tot_losses.append(loss_op) tot_logits.append(logits) tot_labels.append(label_ids_ph[i]) tot_outputs.append(seq_outputs) tot_argmax.append(sentence_argmax) if i == 0: self._select_train_variables() if self.hparams.do_adam_weight_optimizer: self.optimizer, self.global_step = optimization.create_optimizer( loss_op, self.hparams.learning_rate, self.num_train_steps, self.num_warmup_steps, use_tpu=False) else: self.optimizer = tf.train.AdamOptimizer( self.hparams.learning_rate) self.global_step = tf.Variable(0, name="global_step", trainable=False) if not self.hparams.do_train_bert: if i == 0: for var in tf.trainable_variables(): if var not in self.pretrained_not_train_var_names: tvars.append(var) else: tvars = tf.trainable_variables() if self.hparams.do_adam_weight_optimizer: # This is how the model was pre-trained. grads = tf.gradients(loss_op, tvars) (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0) tower_grads.append(zip(grads, tvars)) else: grads = self.optimizer.compute_gradients( loss_op, var_list=tvars) tower_grads.append(grads) tf.get_variable_scope().reuse_variables() avg_grads = average_gradients(tower_grads) self.loss_op = tf.divide(tf.add_n(tot_losses), gpu_num) self.logits = tf.concat(tot_logits, axis=0) self.sequence_outputs = seq_outputs self.sentence_argmax = sentence_argmax tot_labels = tf.concat(tot_labels, axis=0) with tf.variable_scope('', reuse=tf.AUTO_REUSE): self.train_op = self.optimizer.apply_gradients( avg_grads, self.global_step) # new_global_step = self.global_step + 1 # self.train_op = tf.group(self.train_op, [self.global_step.assign(new_global_step)]) if self.hparams.loss_type == "sigmoid": correct_pred = tf.equal(tf.round(tf.nn.sigmoid(self.logits)), tf.cast(self.label_ids_ph, tf.float32)) self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) else: eval = tf.nn.in_top_k(self.logits, self.label_ids_ph, 1) correct_count = tf.reduce_sum(tf.cast(eval, tf.int32)) self.accuracy = tf.divide(correct_count, tf.shape(self.label_ids_ph)[0]) self.confidence = tf.nn.softmax(self.logits, axis=-1) if not self.train_setup_vars["do_evaluate"] and not self.train_setup_vars["on_training"] \ and not self.train_setup_vars["is_train_continue"]: self._initialize_uninitialized_variables()
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] masked_lm_positions = features["masked_lm_positions"] masked_lm_ids = features["masked_lm_ids"] masked_lm_weights = features["masked_lm_weights"] next_sentence_labels = features["next_sentence_labels"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling_base.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_adapter_layer=False, scope="bert") # (masked_lm_loss, # masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( # bert_config, model.get_sequence_output(), model.get_embedding_table(), # masked_lm_positions, masked_lm_ids, masked_lm_weights) (next_sentence_loss, next_sentence_example_loss, next_sentence_log_probs) = get_next_sentence_output( bert_config, model.get_pooled_output(), next_sentence_labels) # total_loss = masked_lm_loss + next_sentence_loss total_loss = next_sentence_loss # TODO: next_sentence_loss # tf.summary.scalar('loss/masked_lm_loss', masked_lm_loss) tf.summary.scalar('loss/next_sentence_loss', next_sentence_loss) tf.summary.scalar('loss/total_loss', total_loss) tvars = tf.trainable_variables() vars = tf.global_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling_utils.get_assignment_map_from_checkpoint( vars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) for map_val in assignment_map: print(map_val) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) train_logging_hook = tf.train.LoggingTensorHook( { "loss": total_loss, # "masked_lm_loss" : masked_lm_loss, "next_sentence_loss": next_sentence_loss }, every_n_iter=100) summary_hook = tf.train.SummarySaverHook( save_steps=100, output_dir=FLAGS.output_dir, summary_op=tf.summary.merge_all) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn, training_hooks=[train_logging_hook], evaluation_hooks=[summary_hook], ) # elif mode == tf.estimator.ModeKeys.EVAL: # # def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, # masked_lm_weights, next_sentence_example_loss, # next_sentence_log_probs, next_sentence_labels): # """Computes the loss and accuracy of the model.""" # masked_lm_log_probs = tf.reshape(masked_lm_log_probs, # [-1, masked_lm_log_probs.shape[-1]]) # masked_lm_predictions = tf.argmax( # masked_lm_log_probs, axis=-1, output_type=tf.int32) # masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1]) # masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) # masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) # masked_lm_accuracy = tf.metrics.accuracy( # labels=masked_lm_ids, # predictions=masked_lm_predictions, # weights=masked_lm_weights) # masked_lm_mean_loss = tf.metrics.mean( # values=masked_lm_example_loss, weights=masked_lm_weights) # # next_sentence_log_probs = tf.reshape( # next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]]) # next_sentence_predictions = tf.argmax( # next_sentence_log_probs, axis=-1, output_type=tf.int32) # next_sentence_labels = tf.reshape(next_sentence_labels, [-1]) # next_sentence_accuracy = tf.metrics.accuracy( # labels=next_sentence_labels, predictions=next_sentence_predictions) # next_sentence_mean_loss = tf.metrics.mean( # values=next_sentence_example_loss) # # return { # "masked_lm_accuracy": masked_lm_accuracy, # "masked_lm_loss": masked_lm_mean_loss, # "next_sentence_accuracy": next_sentence_accuracy, # "next_sentence_loss": next_sentence_mean_loss, # } # # eval_metrics = (metric_fn, [ # masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, # masked_lm_weights, next_sentence_example_loss, # next_sentence_log_probs, next_sentence_labels # ]) # # output_spec = tf.contrib.tpu.TPUEstimatorSpec( # mode=mode, # loss=total_loss, # eval_metrics=eval_metrics, # scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec