def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" unique_ids = features["unique_ids"] input_ids = features["input_ids"] input_mask = features["input_mask"] input_type_ids = features["input_type_ids"] model = modeling.BertModel( config=bert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, token_type_ids=input_type_ids, use_one_hot_embeddings=use_one_hot_embeddings) if mode != tf.estimator.ModeKeys.PREDICT: raise ValueError("Only PREDICT modes are supported: %s" % (mode)) tvars = tf.trainable_variables() scaffold_fn = None (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) all_layers = model.get_all_encoder_layers() predictions = { "unique_id": unique_ids, } for (i, layer_index) in enumerate(layer_indexes): predictions["layer_output_%d" % i] = all_layers[layer_index] output_spec = tf.contrib.tpu.TPUEstimatorSpec(mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] # label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (logits, probabilities) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, num_labels) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities}, scaffold_fn=scaffold_fn) return output_spec
def load_model(bert_config, init_checkpoint: Union[str, None], layer_indexes: List[int], input_ids, input_mask, input_type_ids, is_training: bool=False, use_one_hot_embeddings: bool=False, scope: str=None): # Load the Bert Model model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=input_type_ids, use_one_hot_embeddings=use_one_hot_embeddings, scope=scope ) tvars = tf.trainable_variables() initialized_variable_names = {} # Load the checkpoint if init_checkpoint is None: tf.logging.info("No checkpoint was loaded.") else: (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) # Get the defined output layer of the model (or concat multiple layers if specified) if len(layer_indexes) == 1: output_layer = model.get_all_encoder_layers()[layer_indexes[0]] else: all_layers = [model.get_all_encoder_layers()[l] for l in layer_indexes] output_layer = tf.concat(all_layers, -1) # Just some prints to make sure the ckpt init worked if init_checkpoint is not None: tf.logging.info("*** Trainable Variables ***") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) return output_layer
def model_fn(features, labels, mode, params): # logging.info("*** Features ***") # for name in sorted(features.keys()): # logging.info(" name = {}, shape = {}".format(name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, pred_ids) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) vars = tf.trainable_variables() scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint( vars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) logging.info("**** Trainable Variables ****") # for var in vars: # init_string = "" # if var.name in initialized_variable_names: # init_string = ", *INIT_FROM_CKPT*" # logging.info(" name = {}, shape = {} {}".format(var.name, var.shape, init_string)) output_spec = None output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=pred_ids, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): input_ids = features['input_ids'] input_mask = features['input_mask'] segment_ids = features['segment_ids'] label_ids = features['label_ids'] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) print(label_ids) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) if init_checkpoint: tvars = tf.trainable_variables() (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, train_op=train_op) #, #scaffold=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" unique_ids = features["unique_ids"] input_ids = features["input_ids"] input_mask = features["input_mask"] input_type_ids = features["input_type_ids"] model = modeling.BertModel(config=bert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, token_type_ids=input_type_ids, use_one_hot_embeddings=False) if mode != tf.estimator.ModeKeys.PREDICT: raise ValueError("Only PREDICT modes are supported: %s" % (mode)) tvars = tf.trainable_variables() scaffold_fn = None (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_layer = model.get_pooled_output() predictions = {"unique_id": unique_ids, "output_layer": output_layer} output_spec = tf.contrib.tpu.TPUEstimatorSpec(mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def __init__(self, bert_config, num_labels, seq_length, init_checkpoint): self.bert_config = bert_config self.num_labels = num_labels self.seq_length = seq_length self.input_ids = tf.placeholder(tf.int32, [None, self.seq_length], name='input_ids') self.input_mask = tf.placeholder(tf.int32, [None, self.seq_length], name='input_mask') self.segment_ids = tf.placeholder(tf.int32, [None, self.seq_length], name='segment_ids') self.labels = tf.placeholder(tf.int32, [None], name='labels') self.is_training = tf.placeholder(tf.bool, name='is_training') self.learning_rate = tf.placeholder(tf.float32, name='learn_rate') self.model = modeling.BertModel(config=self.bert_config, is_training=self.is_training, input_ids=self.input_ids, input_mask=self.input_mask, token_type_ids=self.segment_ids) tvars = tf.trainable_variables() initialized_variable_names = {} if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) self.inference()
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" client_id = features["client_id"] input_ids = features["input_ids"] input_mask = features["input_mask"] input_type_ids = features["input_type_ids"] model = modeling.BertModel( config=bert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, token_type_ids=input_type_ids, use_one_hot_embeddings=use_one_hot_embeddings) if mode != tf.estimator.ModeKeys.PREDICT: raise ValueError("Only PREDICT modes are supported: %s" % (mode)) tvars = tf.trainable_variables() (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) # tf.logging.info("**** Trainable Variables ****") # for var in tvars: # init_string = "" # if var.name in initialized_variable_names: # init_string = ", *INIT_FROM_CKPT*" # tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, # init_string) predictions = { 'client_id': client_id, 'encodes': model.get_sentence_encoding() } return EstimatorSpec(mode=mode, predictions=predictions)
def build_model(self): tvars = tf.trainable_variables() initialized_variable_names = {} # 加载bert模型, 初始化变量名,assignment_map和initialized_variable_names都是有序的字典, # assignment_map取出了tvars中所有的变量名,并且键和值都是变量名 if self.init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, self.init_checkpoint) tf.train.init_from_checkpoint(self.init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示 (loss, logits, pred_y) = self.create_model() self.loss = loss self.pred_y = pred_y print(loss) print(FLAGS.learning_rate) print(self.num_train_steps) print(self.num_warmup_steps) self.train_op = optimization.create_optimizer(loss, FLAGS.learning_rate, self.num_train_steps, self.num_warmup_steps, use_tpu=False) self.saver = tf.train.Saver(tf.global_variables())
def __bert_embedding(self, token_ids, token_masks, segment_ids, masks, keep_prob=0.8): """Compute BERT embeddings """ from bert import modeling bert_model = modeling.BertModel( config=self.bert_config, is_training=self.is_training, input_ids=token_ids, input_mask=token_masks, token_type_ids=segment_ids, use_one_hot_embeddings=False) bert_embeddings = bert_model.get_sequence_output() # (batch_size, bert_max_seq_length, bert_embedding_size) # initialize pre-trained bert if self.is_training and self.bert_init_checkpoint: tvars = tf.trainable_variables() (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars, self.bert_init_checkpoint) tf.train.init_from_checkpoint(self.bert_init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) return tf.nn.dropout(bert_embeddings, keep_prob)
def __init__(self, bert_config): self.bert_config = bert_config self.input_ids = \ tf.placeholder(shape=[None, SEQ_LEN], dtype=tf.int32, name="input_ids") self.input_mask = \ tf.placeholder(shape=[None, SEQ_LEN], dtype=tf.int32, name="input_mask") self.token_type = \ tf.placeholder(shape=[None, SEQ_LEN], dtype=tf.int32, name="segment_ids") self.masked_lm_positions = \ tf.placeholder(shape=[None, MAX_PREDICTIONS_PER_SEQ], dtype=tf.int32, name="masked_lm_positions") self.masked_lm_ids = \ tf.placeholder(shape=[None, MAX_PREDICTIONS_PER_SEQ], dtype=tf.int32, name="masked_lm_ids") model = modeling.BertModel(config=self.bert_config, is_training=False, input_ids=self.input_ids, input_mask=self.input_mask, token_type_ids=self.token_type, use_one_hot_embeddings=False) self.input_tensor = model.get_sequence_output() self.output_weights = model.get_embedding_table() self.masked_lm_example_loss = self.get_masked_lm_output() tvars = tf.trainable_variables() initialized_variable_names = {} (assignment_map, initialized_variable_names) \ = modeling.get_assignment_map_from_checkpoint(tvars, BERT_INIT_CHKPNT) tf.train.init_from_checkpoint(BERT_INIT_CHKPNT, assignment_map) tf.compat.v1.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.compat.v1.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string)
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = tf.reshape(features["input_ids"], [-1, FLAGS.max_seq_length]) input_mask = tf.reshape(features["input_mask"], [-1, FLAGS.max_seq_length]) segment_ids = tf.reshape(features["segment_ids"], [-1, FLAGS.max_seq_length]) label_types = features["label_types"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) is_real_example = tf.reduce_sum( tf.one_hot(label_types, FLAGS.k_size * 2), axis=1) model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) (cpc_loss, _, logits, probabilities) = bilin_model_builder.create_model( model, label_ids, label_types, num_choices, k_size=FLAGS.k_size) if add_masking: mask_rate = FLAGS.mask_rate # search alternatives? max_predictions_per_seq = int(math.ceil(FLAGS.max_seq_length * mask_rate)) masked_lm_positions = tf.reshape(features["mask_indices"], [-1, max_predictions_per_seq]) masked_lm_ids = tf.reshape(features["target_token_ids"], [-1, max_predictions_per_seq]) masked_lm_weights = tf.reshape(features["target_token_weights"], [-1, max_predictions_per_seq]) (masked_lm_loss, _, _) = bilin_model_builder.get_masked_lm_output( bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) total_loss = cpc_loss + masked_lm_loss else: total_loss = cpc_loss masked_lm_loss = tf.constant([0]) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(cpc_loss, mlm_loss, label_ids, logits, is_real_example): """Collect metrics for function.""" predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy( labels=label_ids, predictions=predictions, weights=is_real_example) cpc_loss_metric = tf.metrics.mean(values=cpc_loss) mlm_loss_metric = tf.metrics.mean(values=mlm_loss) metric_dict = { "eval_accuracy": accuracy, "eval_cpc_loss": cpc_loss_metric, "eval_mlm_loss": mlm_loss_metric } for i in range(FLAGS.k_size * 2): metric_dict["acc" + str(i)] = tf.metrics.accuracy( labels=label_ids[:, i], predictions=predictions[:, i], weights=is_real_example[:, i]) return metric_dict eval_metrics = (metric_fn, [ cpc_loss, masked_lm_loss, label_ids, logits, is_real_example ]) output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities}, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] token_label_ids = features["token_label_ids"] predicate_label_id = features["predicate_label_id"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(token_label_ids), dtype=tf.float32) # TO DO is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, predicate_loss, predicate_per_example_loss, predicate_probabilities, predicate_prediction, token_label_loss, token_label_per_example_loss, token_label_logits, token_label_predictions) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, token_label_ids, predicate_label_id, num_token_labels, num_predicate_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(predicate_loss, token_label_per_example_loss, predicate_probabilities, token_label_ids, token_label_logits, is_real_example): predicate_prediction = tf.argmax(predicate_probabilities, axis=-1, output_type=tf.int32) token_label_predictions = tf.argmax(token_label_logits, axis=-1, output_type=tf.int32) token_label_pos_indices_list = list( range(num_token_labels) )[4:] # ["[Padding]","[##WordPiece]", "[CLS]", "[SEP]"] + seq_out_set pos_indices_list = token_label_pos_indices_list[: -1] # do not care "O" token_label_precision_macro = tf_metrics.precision( token_label_ids, token_label_predictions, num_token_labels, pos_indices_list, average="macro") token_label_recall_macro = tf_metrics.recall( token_label_ids, token_label_predictions, num_token_labels, pos_indices_list, average="macro") token_label_f_macro = tf_metrics.f1(token_label_ids, token_label_predictions, num_token_labels, pos_indices_list, average="macro") token_label_precision_micro = tf_metrics.precision( token_label_ids, token_label_predictions, num_token_labels, pos_indices_list, average="micro") token_label_recall_micro = tf_metrics.recall( token_label_ids, token_label_predictions, num_token_labels, pos_indices_list, average="micro") token_label_f_micro = tf_metrics.f1(token_label_ids, token_label_predictions, num_token_labels, pos_indices_list, average="micro") token_label_loss = tf.metrics.mean( values=token_label_per_example_loss, weights=is_real_example) predicate_loss = tf.metrics.mean(values=predicate_loss) return { "eval_predicate_loss": predicate_loss, "predicate_prediction": predicate_prediction, "eval_token_label_precision(macro)": token_label_precision_macro, "eval_token_label_recall(macro)": token_label_recall_macro, "eval_token_label_f(macro)": token_label_f_macro, "eval_token_label_precision(micro)": token_label_precision_micro, "eval_token_label_recall(micro)": token_label_recall_micro, "eval_token_label_f(micro)": token_label_f_micro, "eval_token_label_loss": token_label_loss, } eval_metrics = (metric_fn, [ predicate_loss, token_label_per_example_loss, predicate_probabilities, token_label_ids, token_label_logits, is_real_example ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={ "predicate_probabilities": predicate_probabilities, "predicate_prediction": predicate_prediction, "token_label_predictions": token_label_predictions }, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_masks = features["input_masks"] segment_ids = features["segment_ids"] token_label_ids = features["token_label_ids"] if mode in [ tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL ] else None sent_label_ids = features["sent_label_ids"] if mode in [ tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL ] else None loss, token_predict_ids, sent_predict_ids = create_model( bert_config, input_ids, input_masks, segment_ids, token_label_ids, sent_label_ids, token_label_list, sent_label_list, mode, use_tpu) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: assignment_map, initialized_variable_names = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(token_label_ids, sent_label_ids, token_predict_ids, sent_predict_ids): token_precision = tf.metrics.precision( labels=token_label_ids, predictions=token_predict_ids) token_recall = tf.metrics.recall(labels=token_label_ids, predictions=token_predict_ids) sent_accuracy = tf.metrics.accuracy( labels=sent_label_ids, predictions=sent_predict_ids) metric = { "token_precision": token_precision, "token_recall": token_recall, "sent_accuracy": sent_accuracy, } return metric masked_token_label_ids = get_masked_data(token_label_ids, token_label_list) masked_token_predict_ids = get_masked_data(token_predict_ids, token_label_list) eval_metrics = (metric_fn, [ masked_token_label_ids, sent_label_ids, masked_token_predict_ids, sent_predict_ids ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={ "token_predict": token_predict_ids, "sent_predict": sent_predict_ids }, scaffold_fn=scaffold_fn) return output_spec
def main(_): tf.logging.set_verbosity(tf.logging.INFO) if FLAGS.input_file_processor == "run_classifier": processors = { "sst-2": rc.SST2Processor, "mnli": rc.MnliProcessor, } elif FLAGS.input_file_processor == "run_classifier_distillation": processors = { "sst-2": rc.SST2ProcessorDistillation, "mnli": rc.MNLIProcessorDistillation, } else: raise ValueError("Invalid --input_file_processor flag value") tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) task_name = FLAGS.task_name.lower() processor = processors[task_name]() label_list = processor.get_labels() num_labels = len(label_list) input_ids_placeholder = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.max_seq_length]) bert_input_mask_placeholder = tf.placeholder( dtype=tf.int32, shape=[None, FLAGS.max_seq_length]) token_type_ids_placeholder = tf.placeholder( dtype=tf.int32, shape=[None, FLAGS.max_seq_length]) prob_vector_placeholder = tf.placeholder(dtype=tf.float32, shape=[None, num_labels]) one_hot_input_ids = tf.one_hot(input_ids_placeholder, depth=bert_config.vocab_size) input_tensor, _ = em_util.run_one_hot_embeddings( one_hot_input_ids=one_hot_input_ids, config=bert_config) flex_input_obj, per_eg_obj, probs = em_util.model_fn( input_tensor=input_tensor, bert_input_mask=bert_input_mask_placeholder, token_type_ids=token_type_ids_placeholder, bert_config=bert_config, num_labels=num_labels, obj_type=FLAGS.obj_type, prob_vector=prob_vector_placeholder) if FLAGS.obj_type.startswith("min"): final_obj = -1 * flex_input_obj elif FLAGS.obj_type.startswith("max"): final_obj = flex_input_obj # Calculate the gradient of the final loss function with respect to # the one-hot input space grad_obj_one_hot = tf.gradients(ys=final_obj, xs=one_hot_input_ids)[0] # gradients with respect to position in one hot input space with 1s in it # this is one term in the directional derivative of HotFlip, # Eq1 in https://arxiv.org/pdf/1712.06751.pdf # # grad_obj_one_hot.shape = [batch_size, seq_length, vocab_size] # input_ids_placeholder.shape = [batch_size, seq_length] # original_token_gradients.shape = [batch_size, seq_length] original_token_gradients = tf.gather(params=grad_obj_one_hot, indices=tf.expand_dims( input_ids_placeholder, -1), batch_dims=2) original_token_gradients = tf.tile(original_token_gradients, multiples=[1, 1, FLAGS.beam_size]) # These are the gradients / indices whose one-hot position has the largest # gradient magnitude, the performs part of the max calculation in Eq10 of # https://arxiv.org/pdf/1712.06751.pdf biggest_gradients, biggest_indices = tf.nn.top_k(input=grad_obj_one_hot, k=FLAGS.beam_size) # Eq10 of https://arxiv.org/pdf/1712.06751.pdf grad_difference = biggest_gradients - original_token_gradients tvars = tf.trainable_variables() assignment_map, _ = modeling.get_assignment_map_from_checkpoint( tvars, FLAGS.init_checkpoint) tf.logging.info("Variables mapped = %d / %d", len(assignment_map), len(tvars)) tf.train.init_from_checkpoint(FLAGS.init_checkpoint, assignment_map) sess = tf.Session() sess.run(tf.global_variables_initializer()) if FLAGS.input_file: custom_examples = processor.get_custom_examples(FLAGS.input_file) custom_templates = [ em_util.input_to_template(x, label_list) for x in custom_examples ] else: prob_vector = [float(x) for x in FLAGS.prob_vector.split(",")] custom_templates = [(FLAGS.input_template, prob_vector)] num_input_sequences = custom_templates[0][0].count("[SEP]") if FLAGS.flipping_mode == "beam_search": FLAGS.batch_size = 1 detok_partial = functools.partial(em_util.detokenize, tokenizer=tokenizer) # Since input files will often be quite large, this flag allows processing # only a slice of the input file if FLAGS.input_file_range: start_index, end_index = FLAGS.input_file_range.split("-") if start_index == "start": start_index = 0 if end_index == "end": end_index = len(custom_templates) start_index, end_index = int(start_index), int(end_index) else: start_index = 0 end_index = len(custom_templates) tf.logging.info("Processing examples in range %d, %d", start_index, end_index) all_elements = [] too_long = 0 for ip_num, (ip_template, prob_vector) in enumerate( custom_templates[start_index:end_index]): # Parse the input template into a list of IDs and the corresponding mask. # Different segments in template are separated by " <piece> " # Each segment is associated with a word piece (or [EMPTY] to get flex # inputs) and a frequency. (which is separated by "<freq>"). * can be used # to choose a frequency till the end of the string # # Here is an example 2-sequence template for tasks like MNLI to optimize # 20 vectors, (10 for each sequence) # [CLS]<freq>1 <piece> [EMPTY]<freq>10 <piece> [SEP]<freq>1 <piece> \ # [EMPTY]<freq>10 <piece> [SEP]<freq>1 <piece> [PAD]<freq>* (input_ids, input_mask, bert_input_mask, token_type_ids) = em_util.template_to_ids( template=ip_template, config=bert_config, tokenizer=tokenizer, max_seq_length=FLAGS.max_seq_length) if len(input_ids) > FLAGS.max_seq_length: # truncate them! input_ids = input_ids[:FLAGS.max_seq_length] input_mask = input_mask[:FLAGS.max_seq_length] bert_input_mask = bert_input_mask[:FLAGS.max_seq_length] token_type_ids = token_type_ids[:FLAGS.max_seq_length] too_long += 1 all_elements.append({ "input_ids": input_ids, "original_input_ids": [ii for ii in input_ids], "ip_num": start_index + ip_num, "score": 0.0, "bert_input_mask": bert_input_mask, "input_mask": input_mask, "token_type_ids": token_type_ids, "prob_vector": prob_vector, "stopped": False, "steps_taken": 0 }) tf.logging.info("%d / %d were too long and hence truncated.", too_long, len(all_elements)) iteration_number = 0 consistent_output_sequences = [] while all_elements and iteration_number < 10: steps_taken = [] output_sequences = [] failures = [] zero_step_instances = 0 iteration_number += 1 tf.logging.info("Starting iteration number %d", iteration_number) tf.logging.info("Pending items = %d / %d", len(all_elements), len(custom_templates[start_index:end_index])) batch_elements = [] for ip_num, input_object in enumerate(all_elements): batch_elements.append(input_object) # wait until the input has populated up to the batch size if (len(batch_elements) < FLAGS.batch_size and ip_num < len(all_elements) - 1): continue # optimize a part of the flex_input (depending on the template) for step_num in range(FLAGS.total_steps): feed_dict = { input_ids_placeholder: np.array([x["input_ids"] for x in batch_elements]), bert_input_mask_placeholder: np.array([x["bert_input_mask"] for x in batch_elements]), token_type_ids_placeholder: np.array([x["token_type_ids"] for x in batch_elements]), prob_vector_placeholder: np.array([x["prob_vector"] for x in batch_elements]) } if FLAGS.flipping_mode == "random": # Avoiding the gradient computation when the flipping mode is random peo, pr = sess.run([per_eg_obj, probs], feed_dict=feed_dict) else: peo, gd, bi, pr = sess.run( [per_eg_obj, grad_difference, biggest_indices, probs], feed_dict=feed_dict) if FLAGS.print_flips: output_log = "\n" + "\n".join([ "Objective = %.4f, Score = %.4f, Element %d = %s" % (obj, elem["score"], kk, detok_partial(elem["input_ids"])) for kk, (obj, elem) in enumerate(zip(peo, batch_elements)) ]) tf.logging.info("Step = %d %s\n", step_num, output_log) should_stop = evaluate_stopping( stopping_criteria=FLAGS.stopping_criteria, obj_prob_vector=np.array( [x["prob_vector"] for x in batch_elements]), curr_prob_vector=pr, per_example_objective=peo) for elem, stop_bool in zip(batch_elements, should_stop): if stop_bool and (not elem["stopped"]): if step_num == 0: # don't actually stop the perturbation since we want a new input zero_step_instances += 1 else: elem["stopped"] = True elem["steps_taken"] = step_num if np.all([elem["stopped"] for elem in batch_elements]): steps_taken.extend( [elem["steps_taken"] for elem in batch_elements]) output_sequences.extend([elem for elem in batch_elements]) batch_elements = [] break if step_num == FLAGS.total_steps - 1: failures.extend([ elem for elem in batch_elements if not elem["stopped"] ]) steps_taken.extend([ elem["steps_taken"] for elem in batch_elements if elem["stopped"] ]) output_sequences.extend( [elem for elem in batch_elements if elem["stopped"]]) batch_elements = [] break # Flip a token / word-piece either systematically or randomly # For instances where hotflip was not successful, do some random # perturbations before doing hotflip if (FLAGS.flipping_mode == "random" or (iteration_number > 1 and step_num < iteration_number)): for element in batch_elements: # don't perturb elements which have stopped if element["stopped"]: continue random_seq_index = np.random.choice([ ii for ii, mask_id in enumerate(element["input_mask"]) if mask_id > 0.5 ]) random_token_id = np.random.randint( len(tokenizer.vocab)) while (tokenizer.inv_vocab[random_token_id][0] == "[" and tokenizer.inv_vocab[random_token_id][-1] == "]"): random_token_id = np.random.randint( len(tokenizer.vocab)) element["input_ids"][ random_seq_index] = random_token_id elif FLAGS.flipping_mode == "greedy": batch_elements = greedy_updates( old_elements=batch_elements, grad_difference=gd, biggest_indices=bi, max_seq_length=FLAGS.max_seq_length) elif FLAGS.flipping_mode == "beam_search": # only supported with a batch size of 1! batch_elements = beam_search( old_beams=batch_elements, grad_difference=gd, biggest_indices=bi, beam_size=FLAGS.beam_size, accumulate_scores=FLAGS.accumulate_scores, max_seq_length=FLAGS.max_seq_length) else: raise ValueError("Invalid --flipping_mode flag value") tf.logging.info("steps = %.4f (%d failed, %d non-zero, %d zero)", np.mean([float(x) for x in steps_taken if x > 0]), len(failures), len([x for x in steps_taken if x > 0]), zero_step_instances) # measure consistency of final dataset - run a forward pass through the # entire final dataset and verify it satisfies the original objective. This # if the code runs correctly, total_inconsistent = 0 tf.logging.info("Measuring consistency of final dataset") total_inconsistent = 0 total_lossy = 0 for i in range(0, len(output_sequences), FLAGS.batch_size): batch_elements = output_sequences[i:i + FLAGS.batch_size] feed_dict = { input_ids_placeholder: np.array([x["input_ids"] for x in batch_elements]), bert_input_mask_placeholder: np.array([x["bert_input_mask"] for x in batch_elements]), token_type_ids_placeholder: np.array([x["token_type_ids"] for x in batch_elements]), prob_vector_placeholder: np.array([x["prob_vector"] for x in batch_elements]) } peo, pr = sess.run([per_eg_obj, probs], feed_dict=feed_dict) consistency_flags = evaluate_stopping( stopping_criteria=FLAGS.stopping_criteria, obj_prob_vector=np.array( [x["prob_vector"] for x in batch_elements]), curr_prob_vector=pr, per_example_objective=peo) total_inconsistent += len(batch_elements) - np.sum( consistency_flags) # Next, apply a lossy perturbation to the input (conversion to a string) # This is often lossy since it eliminates impossible sequences and # incorrect tokenizations. We check how many consistencies still hold true all_detok_strings = [ em_util.ids_to_strings(elem["input_ids"], tokenizer) for elem in batch_elements ] all_ip_examples = [] if num_input_sequences == 1: for ds, be in zip(all_detok_strings, batch_elements): prob_vector_labels = be["prob_vector"].tolist() all_ip_examples.append( rc.InputExample(text_a=ds[0], text_b=None, label=prob_vector_labels, guid=None)) else: for ds, be in zip(all_detok_strings, batch_elements): prob_vector_labels = be["prob_vector"].tolist() all_ip_examples.append( rc.InputExample(text_a=ds[0], text_b=ds[1], label=prob_vector_labels, guid=None)) all_templates = [ em_util.input_to_template(aie, label_list) for aie in all_ip_examples ] all_new_elements = [] for ip_template, prob_vector in all_templates: (input_ids, input_mask, bert_input_mask, token_type_ids) = em_util.template_to_ids( template=ip_template, config=bert_config, tokenizer=tokenizer, max_seq_length=FLAGS.max_seq_length) if len(input_ids) > FLAGS.max_seq_length: input_ids = input_ids[:FLAGS.max_seq_length] input_mask = input_mask[:FLAGS.max_seq_length] bert_input_mask = bert_input_mask[:FLAGS.max_seq_length] token_type_ids = token_type_ids[:FLAGS.max_seq_length] all_new_elements.append({ "input_ids": input_ids, "input_mask": input_mask, "bert_input_mask": bert_input_mask, "token_type_ids": token_type_ids, "prob_vector": prob_vector }) feed_dict = { input_ids_placeholder: np.array([x["input_ids"] for x in all_new_elements]), bert_input_mask_placeholder: np.array([x["bert_input_mask"] for x in all_new_elements]), token_type_ids_placeholder: np.array([x["token_type_ids"] for x in all_new_elements]), prob_vector_placeholder: np.array([x["prob_vector"] for x in all_new_elements]) } peo, pr = sess.run([per_eg_obj, probs], feed_dict=feed_dict) lossy_consistency_flags = evaluate_stopping( stopping_criteria=FLAGS.stopping_criteria, obj_prob_vector=np.array( [x["prob_vector"] for x in all_new_elements]), curr_prob_vector=pr, per_example_objective=peo) total_lossy += len(all_new_elements) - np.sum( lossy_consistency_flags) net_consistency_flags = np.logical_and(consistency_flags, lossy_consistency_flags) for elem, ncf in zip(batch_elements, net_consistency_flags): if ncf: consistent_output_sequences.append(elem) else: failures.append(elem) tf.logging.info("Total inconsistent found = %d / %d", total_inconsistent, len(output_sequences)) tf.logging.info("Total lossy inconsistent found = %d / %d", total_lossy, len(output_sequences)) tf.logging.info("Total consistent outputs so far = %d / %d", len(consistent_output_sequences), len(custom_templates[start_index:end_index])) # Getting ready for next iteration of processing if iteration_number < 10: for elem in failures: elem["input_ids"] = [x for x in elem["original_input_ids"]] elem["stopped"] = False elem["steps_taken"] = 0 elem["score"] = 0.0 all_elements = failures tf.logging.info("Giving up on %d instances!", len(failures)) for elem in failures: consistent_output_sequences.append(elem) if FLAGS.output_file: final_output = [] for op_num, elem in enumerate(consistent_output_sequences): detok_strings = em_util.ids_to_strings(elem["input_ids"], tokenizer) if num_input_sequences == 1: final_output.append("%d\t%d\t%s" % (op_num, elem["ip_num"], detok_strings[0])) elif num_input_sequences == 2: final_output.append("%d\t%d\t%s\t%s" % (op_num, elem["ip_num"], detok_strings[0], detok_strings[1])) if num_input_sequences == 1: header = "index\toriginal_index\tsentence" elif num_input_sequences == 2: header = "index\toriginal_index\tsentence1\tsentence2" final_output = [header] + final_output with tf.gfile.Open(FLAGS.output_file, "w") as f: f.write("\n".join(final_output) + "\n") return
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) num_choices = 2 read_size = num_choices + 1 input_ids = [ features["input_ids" + str(i)] for i in range(0, read_size) ] input_mask = [ features["input_mask" + str(i)] for i in range(0, read_size) ] segment_ids = [ features["segment_ids" + str(i)] for i in range(0, read_size) ] label_ids = features["labels"] label_ids = label_ids[:, 4] seq_length = input_ids[0].shape[-1] input_ids = tf.reshape(tf.stack(input_ids, axis=1), [-1, seq_length]) input_mask = tf.reshape(tf.stack(input_mask, axis=1), [-1, seq_length]) segment_ids = tf.reshape(tf.stack(segment_ids, axis=1), [-1, seq_length]) is_training = (mode == tf_estimator.ModeKeys.TRAIN) model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) if FLAGS.bilin_preproc: (total_loss, per_example_loss, logits, probabilities) = model_builder.create_model_bilin( model, label_ids, num_choices) else: (total_loss, per_example_loss, logits, probabilities) = model_builder.create_model( model, label_ids, num_choices) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf_estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf_estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions) loss = tf.metrics.mean(values=per_example_loss) return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities}, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] print('shape of input_ids', input_ids.shape) # label_mask = features["label_mask"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示 (total_loss, logits, trans, pred_ids) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) print("total_loss=", total_loss) print("shape of pred_ids", pred_ids.shape) print(trans) tvars = tf.trainable_variables() scaffold_fn = None # 加载BERT模型 if init_checkpoint: (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) # 初始化变量,从已经预训练的模型中获得这些参数 tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") # 打印加载模型的参数 # for var in tvars: # init_string = "" # if var.name in initialized_variable_names: # init_string = ", *INIT_FROM_CKPT*" # tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, # init_string) output_spec = None ## 模型的训练 if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) # 钩子,这里用来将BERT中的参数作为我们模型的初始值 elif mode == tf.estimator.ModeKeys.EVAL: # 针对NER ,进行了修改 def metric_fn(label_ids, logits, trans): # 首先对结果进行维特比解码 # crf 解码 weight = tf.sequence_mask(FLAGS.max_seq_length) precision = tf_metrics.precision(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) recall = tf_metrics.recall(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) f = tf_metrics.f1(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, } eval_metrics = (metric_fn, [label_ids, logits, trans]) # eval_metrics = (metric_fn, [label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) # else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=pred_ids, scaffold_fn=scaffold_fn ) return output_spec
def __init__(self, config): self.config = config self.lr = config["lr"] self.lstm_dim = config["lstm_dim"] self.num_tags = config["num_tags"] self.global_step = tf.Variable(0, trainable=False) self.best_dev_f1 = tf.Variable(0.0, trainable=False) self.best_test_f1 = tf.Variable(0.0, trainable=False) self.initializer = initializers.xavier_initializer() # add placeholders for the model self.input_ids = tf.placeholder(dtype=tf.int32, shape=[None, None], name="input_ids") self.input_mask = tf.placeholder(dtype=tf.int32, shape=[None, None], name="input_mask") self.segment_ids = tf.placeholder(dtype=tf.int32, shape=[None, None], name="segment_ids") self.targets = tf.placeholder(dtype=tf.int32, shape=[None, None], name="Targets") # dropout keep prob self.dropout = tf.placeholder(dtype=tf.float32, name="Dropout") used = tf.sign(tf.abs(self.input_ids)) length = tf.reduce_sum(used, reduction_indices=1) self.lengths = tf.cast(length, tf.int32) self.batch_size = tf.shape(self.input_ids)[0] self.num_steps = tf.shape(self.input_ids)[-1] # embeddings for chinese character and segmentation representation embedding = self.bert_embedding() # apply dropout before feed to lstm layer lstm_inputs = tf.nn.dropout(embedding, self.dropout) # bi-directional lstm layer lstm_outputs = self.biLSTM_layer(lstm_inputs, self.lstm_dim, self.lengths) # logits for tags self.logits = self.project_layer(lstm_outputs) # loss of the model self.loss = self.loss_layer(self.logits, self.lengths) # bert模型参数初始化的地方 init_checkpoint = "/home/ubuntu/zzp/bertNER/pretrain/new_bert/model.ckpt-400000" # 获取模型中所有的训练参数。 tvars = tf.trainable_variables() # 加载BERT模型 (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) print("**** Trainable Variables ****") # 打印加载模型的参数 train_vars = [] for var in tvars: init_string = "" train_vars.append(var) print(" name = %s, shape = %s%s", var.name, var.shape, init_string) with tf.variable_scope("optimizer"): optimizer = self.config["optimizer"] if optimizer == "adam": self.opt = tf.train.AdamOptimizer(self.lr) else: raise KeyError grads = tf.gradients(self.loss, train_vars) (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0) self.train_op = self.opt.apply_gradients( zip(grads, train_vars), global_step=self.global_step) #capped_grads_vars = [[tf.clip_by_value(g, -self.config["clip"], self.config["clip"]), v] # for g, v in grads_vars if g is not None] #self.train_op = self.opt.apply_gradients(capped_grads_vars, self.global_step, ) # saver of the model self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=5)
def model_fn(features, labels, mode, params): tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] #label_mask = features["label_mask"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, predicts) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits): # def metric_fn(label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) precision = tf_metrics.precision(label_ids, predictions, 11, [2, 3, 4, 5, 6, 7], average="macro") recall = tf_metrics.recall(label_ids, predictions, 11, [2, 3, 4, 5, 6, 7], average="macro") f = tf_metrics.f1(label_ids, predictions, 11, [2, 3, 4, 5, 6, 7], average="macro") # return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, #"eval_loss": loss, } eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) # eval_metrics = (metric_fn, [label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=predicts, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] print('shape of input_ids', input_ids.shape) # label_mask = features["label_mask"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) # 使用参数构建模型,input_idx 就是输入的样本idx表示,label_ids 就是标签的idx表示 (total_loss, logits, trans, pred_ids) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() scaffold_fn = None # 加载BERT模型 if init_checkpoint: (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") # 打印加载模型的参数 for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) # 钩子,这里用来将BERT中的参数作为我们模型的初始值 elif mode == tf.estimator.ModeKeys.EVAL: # 针对NER ,进行了修改 def metric_fn(label_ids, logits, trans): # 首先对结果进行维特比解码 # crf 解码 weight = tf.sequence_mask(FLAGS.max_seq_length) precision = tf_metrics.precision(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) recall = tf_metrics.recall(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) f = tf_metrics.f1(label_ids, pred_ids, num_labels, [2, 3, 4, 5, 6, 7], weight) return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, } eval_metrics = (metric_fn, [label_ids, logits, trans]) # eval_metrics = (metric_fn, [label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) # else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=pred_ids, scaffold_fn=scaffold_fn ) return output_spec
def model_fn(features, labels, mode, params): tf.logging.info("*** Features ***") # logging 用来记录日志 for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] pos_embedding = features["pos_embedding"] # 增加获取位置向量 dp_embedding = features["dp_embedding"] # 增加获取位置向量 # label_mask = features["label_mask"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, logits, predicts) = create_model( # 使用BERT的接口建模 bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, pos_embedding, dp_embedding, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() # 得到所有要训练的变量 scaffold_fn = None if init_checkpoint: # 用BERT预加载模型,这里加载的只有BERT预训练的模型 (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) # 使用预训练模型 if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: # 这里输出的是预加载模型中的向量格式 init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: # 在训练阶段 train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) # 创建一个Adam优化器 output_spec = tf.contrib.tpu.TPUEstimatorSpec( # TPU运行时的特殊 estimator mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: # 评估阶段 def metric_fn(label_ids, predicts, valid_labels): # def metric_fn(label_ids, logits): # predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) #直接计算第三维最大值为预测值 precision = tf_metrics.precision( label_ids, predicts, num_labels, valid_labels, average="macro") # 对比实际值和预测值计算正确率 recall = tf_metrics.recall(label_ids, predicts, num_labels, valid_labels, average="macro") # 对比实际值和预测值计算召回率 f = tf_metrics.f1(label_ids, predicts, num_labels, valid_labels, average="macro") # 对比实际值和预测值计算F值 # return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, } eval_metrics = (metric_fn, [label_ids, predicts, valid_labels]) # eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=predicts, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): """The `model_fn` for TPUEstimator.""" del labels, params tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) unique_ids = features["unique_ids"] input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] start_logits, end_logits, answer_type_logits = create_model( bert_config=bert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, span_encoding=span_encoding, max_answer_length=max_answer_length, use_one_hot_embeddings=use_one_hot_embeddings) tvars = tf.trainable_variables() scaffold_fn = None if init_checkpoint: assignment_map, _ = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) predictions = { "unique_ids": tf.identity(unique_ids), "start_logits": start_logits, "end_logits": end_logits, "answer_type_logits": answer_type_logits } # Input features need to be present in tf.Example output. predictions.update({ "input_ids": tf.identity(input_ids), "input_mask": tf.identity(input_mask), "segment_ids": tf.identity(segment_ids), "start_positions": tf.identity(features["start_positions"]), "end_positions": tf.identity(features["end_positions"]), "answer_types": tf.identity(features["answer_types"]) }) output_spec = tf_estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for Estimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, output_layer, logits, probabilities) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, multilabel, sent_rels, sentiment, entailment_rels, entailment, corr_rels, correlation) # Print the details here into a file - No use - Here data isn't loaded # with open('debug_text.txt', 'a+') as infile: # print(logits, probabilities, file=infile) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint, FLAGS.transfer_learning) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Initialized Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: freeze_layer_fn = (None if not FLAGS.freeze_layers else lambda x: "bert" in x) train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False, freeze_layer_fn=freeze_layer_fn) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: # Create dictionary for evaluation metrics eval_dict = {} def metric_fn_single(per_example_loss, label_ids, logits): """Compute accuracy for the single-label case.""" predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) true_labels = tf.argmax( label_ids, axis=-1, output_type=tf.int32) # Get ids from one hot labels accuracy = tf.metrics.accuracy( labels=true_labels, predictions=predictions) loss = tf.metrics.mean(values=per_example_loss) eval_dict["eval_accuracy"] = accuracy eval_dict["eval_loss"] = loss def get_f1(precision, recall): """Calculate F1 score based on precision and recall.""" return (2 * precision[0] * recall[0] / (precision[0] + recall[0] + 1e-5), tf.group(precision[1], recall[1])) def get_threshold_based_scores(y_true, y_pred): """Compute precision, recall and F1 at thresholds.""" thresholds = [float(v) for v in FLAGS.eval_thresholds.split(",")] (prec_t, prec_t_op) = tf.metrics.precision_at_thresholds( y_true, y_pred, thresholds=thresholds) (rec_t, rec_t_op) = tf.metrics.recall_at_thresholds( y_true, y_pred, thresholds=thresholds) for i, v in enumerate(thresholds): eval_dict["precision_at_threshold_%.2f" % v] = (prec_t[i], prec_t_op) eval_dict["recall_at_threshold_%.2f" % v] = (rec_t[i], rec_t_op) eval_dict["F1_at_threshold_%.2f" % v] = get_f1((prec_t[i], prec_t_op), (rec_t[i], rec_t_op)) def get_relation_based_scores(y_true, y_pred, relations, name): """Measure performance based on label relations.""" def expand_labels(labels): """Expand the set of labels based on label relations.""" def check_relations(rels): """Check whether a relation applies to a particular label set.""" is_in_category = tf.reduce_any((labels + rels) > 1) return tf.cond(is_in_category, lambda: labels + rels, lambda: labels) new_labels = tf.reduce_sum( tf.map_fn(check_relations, relations), axis=0) return tf.cast(new_labels >= 1, tf.int64) pred = tf.map_fn(expand_labels, y_pred) true = tf.map_fn(expand_labels, y_true) precision = tf.metrics.precision(true, pred) recall = tf.metrics.recall(true, pred) eval_dict[name + "_precision"] = precision eval_dict[name + "_recall"] = recall eval_dict[name + "_f1"] = get_f1(precision, recall) eval_dict[name + "_accuracy"] = tf.metrics.accuracy(true, pred) def metric_fn_multi(per_example_loss, label_ids, probabilities): """Compute class-level accuracies for the multi-label case.""" label_ids = tf.cast(label_ids, tf.int64) logits_split = tf.split(probabilities, num_labels, axis=-1) label_ids_split = tf.split(label_ids, num_labels, axis=-1) pred_ind = tf.cast(probabilities >= FLAGS.eval_prob_threshold, tf.int64) pred_ind_split = tf.split(pred_ind, num_labels, axis=-1) weights = tf.reduce_sum(label_ids, axis=0) eval_dict["per_example_eval_loss"] = tf.metrics.mean( values=per_example_loss) # Calculate accuracy, precision and recall get_threshold_based_scores(label_ids, probabilities) # Calculate values at the emotion level auc_vals = [] accuracies = [] for j, logits in enumerate(logits_split): current_auc, update_op_auc = tf.metrics.auc(label_ids_split[j], logits) eval_dict[idx2emotion[j] + "_auc"] = (current_auc, update_op_auc) current_acc, update_op_acc = tf.metrics.accuracy( label_ids_split[j], pred_ind_split[j]) eval_dict[idx2emotion[j] + "_accuracy"] = (current_acc, update_op_acc) eval_dict[idx2emotion[j] + "_precision"] = tf.metrics.precision( label_ids_split[j], pred_ind_split[j]) eval_dict[idx2emotion[j] + "_recall"] = tf.metrics.recall( label_ids_split[j], pred_ind_split[j]) auc_vals.append(current_auc) accuracies.append(current_auc) auc_vals = tf.convert_to_tensor(auc_vals, dtype=tf.float32) accuracies = tf.convert_to_tensor(accuracies, dtype=tf.float32) eval_dict["auc"] = tf.metrics.mean(values=auc_vals) eval_dict["auc_weighted"] = tf.metrics.mean( values=auc_vals, weights=weights) eval_dict["accuracy"] = tf.metrics.mean(values=accuracies) eval_dict["accuracy_weighted"] = tf.metrics.mean( values=accuracies, weights=weights) # Calculate sentiment-based performance get_relation_based_scores(label_ids, pred_ind, tf.constant(sentiment_groups, dtype=tf.int64), "sentiment") # Calculate emotion-intensity based performance get_relation_based_scores(label_ids, pred_ind, tf.constant(intensity_groups, dtype=tf.int64), "emotion_intensity") if multilabel: metric_fn_multi(per_example_loss, label_ids, probabilities) else: metric_fn_single(per_example_loss, label_ids, logits) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, eval_metric_ops=eval_dict, scaffold=scaffold_fn) else: print("mode:", mode, "probabilities:", probabilities) output_spec = tf.estimator.EstimatorSpec( mode=mode, predictions={"output_layer":output_layer, "logits":logits, "probabilities": probabilities}, scaffold=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] # label_mask = features["label_mask"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) # batch数据导入 (total_loss, logits, trans, pred_ids) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings, FLAGS.dropout_rate, FLAGS.lstm_size, FLAGS.cell, FLAGS.num_layers) tvars = tf.trainable_variables() # 加载BERT模型 scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) # if use_tpu: # def tpu_scaffold(): # tf.train.init_from_checkpoint( # init_checkpoint, assignment_map) # return tf.train.Scaffold() # # scaffold_fn = tpu_scaffold # else: # tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) tf.summary.scalar('loss', total_loss) # 针对NER有修改 hook_dict = {} hook_dict['loss'] = total_loss hook_dict['global_steps'] = tf.train.get_or_create_global_step() logging_hook = tf.train.LoggingTensorHook( hook_dict, every_n_iter=FLAGS.save_summary_steps) tf.estimator.Estimator tf.estimator.train_and_evaluate output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, training_hooks=[logging_hook]) # output_spec = tf.contrib.tpu.TPUEstimatorSpec( # mode=mode, # loss=total_loss, # train_op=train_op, # scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: # 针对NER ,进行了修改 # def metric_fn(label_ids, pred_ids): # return { # "eval_loss": tf.metrics.mean_squared_error(labels=label_ids, predictions=pred_ids), # } # eval_metrics = metric_fn(label_ids, pred_ids) # output_spec = tf.estimator.EstimatorSpec( # mode=mode, # loss=total_loss, # eval_metric_ops=eval_metrics # ) # hook_dict = {} def metric_fn(label_ids, pred_ids, num_labels): # predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) pos_indices = [id for id in range(2, num_labels - 3)] # pos_indices = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, # 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] # pos_indices = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11] precision = tf_metrics.precision(label_ids, pred_ids, num_labels, pos_indices, average="micro") recall = tf_metrics.recall(label_ids, pred_ids, num_labels, pos_indices, average="micro") f = tf_metrics.f1(label_ids, pred_ids, num_labels, pos_indices, average="micro") # hook_dict['precision'] = precision # hook_dict['recall'] = recall # hook_dict['f'] = f # tf.summary.scalar('precision', precision) return { "eval_precision": precision, "eval_recall": recall, "eval_f": f, # "eval_loss": loss, } # eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) eval_metrics = (metric_fn, [label_ids, pred_ids, num_labels]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec(mode=mode, predictions=pred_ids) return output_spec
def model_fn(features, labels, mode, params): tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] print('shape of input_ids', input_ids.shape) print('shape of label_ids', label_ids.shape) # label_mask = features["label_mask"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, trans, pred_ids) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) print('shape of pred_ids', pred_ids.shape) global_step = tf.train.get_or_create_global_step() # add summary tf.summary.scalar('loss', total_loss) tvars = tf.trainable_variables() scaffold_fn = None if init_checkpoint and is_training: (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.PREDICT: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=pred_ids, scaffold_fn=scaffold_fn ) else: if mode == tf.estimator.ModeKeys.TRAIN: ''' train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) ''' lr = tf.train.exponential_decay(learning_rate, global_step, 5000, 0.9, staircase=True) optimizer = tf.train.AdamOptimizer(lr) grads, _ = tf.clip_by_global_norm(tf.gradients(total_loss, tvars), 1.5) train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step) if FLAGS.use_feature_based: train_op = tf.train.AdamOptimizer(learning_rate).minimize(total_loss, global_step=global_step) logging_hook = tf.train.LoggingTensorHook({"batch_loss" : total_loss}, every_n_iter=10) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, training_hooks = [logging_hook], scaffold_fn=scaffold_fn) else: # mode == tf.estimator.ModeKeys.EVAL: def metric_fn(label_ids, pred_ids, per_example_loss, input_mask): # ['<pad>'] + ["O", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", "B-MISC", "I-MISC", "X"] indices = [2, 3, 4, 5, 6, 7, 8, 9] precision = tf_metrics.precision(label_ids, pred_ids, num_labels, indices, input_mask) recall = tf_metrics.recall(label_ids, pred_ids, num_labels, indices, input_mask) f = tf_metrics.f1(label_ids, pred_ids, num_labels, indices, input_mask) accuracy = tf.metrics.accuracy(label_ids, pred_ids, input_mask) loss = tf.metrics.mean(per_example_loss) return { 'eval_precision': precision, 'eval_recall': recall, 'eval_f': f, 'eval_accuracy': accuracy, 'eval_loss': loss, } eval_metrics = (metric_fn, [label_ids, pred_ids, per_example_loss, input_mask]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): logging.info("*** Features ***") for name in sorted(features.keys()): logging.info(" name = {}, shape = {}".format(name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, pred_ids) = create_model( bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) vars = tf.trainable_variables() scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint( vars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) logging.info("**** Trainable Variables ****") for var in vars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" logging.info(" name = {}, shape = {} {}".format(var.name, var.shape, init_string)) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: # 针对NER ,进行了修改 def metric_fn(label_ids, pred_ids): try: # confusion matrix cm = tf_metrics.streaming_confusion_matrix(label_ids, pred_ids, num_labels, weights=input_mask) return { "confusion_matrix": cm } except Exception as e: logging.error(str(e)) eval_metrics = (metric_fn, [label_ids, pred_ids]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn ) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=pred_ids, scaffold_fn=scaffold_fn) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) else: initialized_variable_names = [] tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy(label_ids, predictions) loss = tf.metrics.mean(per_example_loss) return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.PREDICT: predictions = {"probabilities": probabilities} output_spec = contrib_tpu.TPUEstimatorSpec(mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) else: output_spec = contrib_tpu.TPUEstimatorSpec( mode=mode, predictions=probabilities, scaffold_fn=scaffold_fn) return output_spec
def __init__(self, bert_config, num_labels, seq_length, init_checkpoint): self.bert_config = bert_config self.num_labels = num_labels self.seq_length = seq_length self.tower_grads = [] self.losses = [] self.input_ids = tf.placeholder(tf.int32, [None, self.seq_length], name='input_ids') self.input_mask = tf.placeholder(tf.int32, [None, self.seq_length], name='input_mask') self.segment_ids = tf.placeholder(tf.int32, [None, self.seq_length], name='segment_ids') self.labels = tf.placeholder(tf.int32, [None], name='labels') self.batch_size = tf.placeholder(tf.int32, shape=[], name='batch_size') self.is_training = tf.placeholder(tf.bool, shape=[], name='is_training') print(self.batch_size) self.gpu_step = self.batch_size // gpu_nums global_step = tf.train.get_or_create_global_step() learning_rate = tf.constant(value=init_lr, shape=[], dtype=tf.float32) # Implements linear decay of the learning rate. learning_rate = tf.train.polynomial_decay(learning_rate, global_step, num_train_steps, end_learning_rate=0.0, power=1.0, cycle=False) if num_warmup_steps: global_steps_int = tf.cast(global_step, tf.int32) warmup_steps_int = tf.constant(num_warmup_steps, dtype=tf.int32) global_steps_float = tf.cast(global_steps_int, tf.float32) warmup_steps_float = tf.cast(warmup_steps_int, tf.float32) warmup_percent_done = global_steps_float / warmup_steps_float warmup_learning_rate = init_lr * warmup_percent_done is_warmup = tf.cast(global_steps_int < warmup_steps_int, tf.float32) learning_rate = ((1.0 - is_warmup) * learning_rate + is_warmup * warmup_learning_rate) optimizer = optimization.AdamWeightDecayOptimizer( learning_rate=learning_rate, weight_decay_rate=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-6, exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"]) with tf.variable_scope(tf.get_variable_scope()) as outer_scope: pred = [] label = [] for d in range(gpu_nums): with tf.device("/gpu:%s" % d), tf.name_scope("%s_%s" % ("tower", d)): self.model = modeling.BertModel( config=self.bert_config, is_training=self.is_training, input_ids=self.input_ids[d * self.gpu_step:(d + 1) * self.gpu_step], input_mask=self.input_mask[d * self.gpu_step:(d + 1) * self.gpu_step], token_type_ids=self.segment_ids[d * self.gpu_step:(d + 1) * self.gpu_step]) print("GPU:", d) tvars = tf.trainable_variables() initialized_variable_names = {} if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_layer = self.model.get_pooled_output() logging.info(output_layer) if self.is_training == True: output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) match_1 = tf.strided_slice(output_layer, [0], [self.gpu_step], [2]) match_2 = tf.strided_slice(output_layer, [1], [self.gpu_step], [2]) match = tf.concat([match_1, match_2], 1) self.logits = tf.layers.dense(match, self.num_labels, name='fc', reuse=tf.AUTO_REUSE) #预测标签 self.y_pred_cls = tf.argmax(tf.nn.softmax(self.logits), 1, name="pred") logging.info(self.y_pred_cls) #真实标签 self.r_labels = tf.strided_slice( self.labels[d * self.gpu_step:(d + 1) * self.gpu_step], [0], [self.gpu_step], [2]) logging.info(self.r_labels) one_hot_labels = tf.one_hot(self.r_labels, depth=self.num_labels, dtype=tf.float32) log_probs = tf.nn.log_softmax(self.logits, axis=-1) per_example_loss = - (30*one_hot_labels[:,0] * log_probs[:,0]) \ - (9*one_hot_labels[:,1] * log_probs[:,1]) \ - (2*one_hot_labels[:,2] * log_probs[:,2]) \ - (2*one_hot_labels[:,3] * log_probs[:,3]) \ - (9*one_hot_labels[:,4] * log_probs[:,4]) \ + 1e-10 self.loss = tf.reduce_mean(per_example_loss) #self.optim = optimization.create_optimizer(self.loss, learning_rate, num_train_steps, num_warmup_steps, False) tvars = tf.trainable_variables() grads = tf.gradients(self.loss, tvars) (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0) self.tower_grads.append(list(zip(grads, tvars))) self.losses.append(self.loss) label.append(self.r_labels) pred.append(self.y_pred_cls) outer_scope.reuse_variables() with tf.name_scope("apply_gradients"), tf.device("/cpu:0"): gradients = self.average_gradients(self.tower_grads) train_op = optimizer.apply_gradients(gradients, global_step=global_step) new_global_step = global_step + 1 self.train_op = tf.group(train_op, [global_step.assign(new_global_step)]) self.losses = tf.reduce_mean(self.losses) self.pred = tf.concat(pred, 0) self.label = tf.concat(label, 0) logging.info(self.pred) logging.info(self.label)
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" logging.info("*** Features ***") for name in sorted(features.keys()): logging.info(" name = %s, shape = %s", name, features[name].shape) unique_ids = features["unique_ids"] input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (start_logits, end_logits, answer_type_logits) = create_model( bert_config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = bert_modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: seq_length = bert_modeling.get_shape_list(input_ids)[1] # Computes the loss for positions. def compute_loss(logits, positions): one_hot_positions = tf.one_hot(positions, depth=seq_length, dtype=tf.float32) log_probs = tf.nn.log_softmax(logits, axis=-1) loss = -tf.reduce_mean( tf.reduce_sum(one_hot_positions * log_probs, axis=-1)) return loss # Computes the loss for labels. def compute_label_loss(logits, labels): one_hot_labels = tf.one_hot(labels, depth=len(data.AnswerType), dtype=tf.float32) log_probs = tf.nn.log_softmax(logits, axis=-1) loss = -tf.reduce_mean( tf.reduce_sum(one_hot_labels * log_probs, axis=-1)) return loss start_positions = features["start_positions"] end_positions = features["end_positions"] answer_types = features["answer_types"] start_loss = compute_loss(start_logits, start_positions) end_loss = compute_loss(end_logits, end_positions) answer_type_loss = compute_label_loss(answer_type_logits, answer_types) total_loss = (start_loss + end_loss + answer_type_loss) / 3.0 train_op = bert_optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf_contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.PREDICT: predictions = { "unique_ids": unique_ids, "start_logits": start_logits, "end_logits": end_logits, "answer_type_logits": answer_type_logits, } output_spec = tf_contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and PREDICT modes are supported: %s" % (mode)) return output_spec
def model_fn(features, labels, mode, params): """The `model_fn` for TPUEstimator.""" input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities, predicts) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( loss=total_loss, init_lr=learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=None) hook_dict = dict() hook_dict["loss"] = total_loss hook_dict["global_steps"] = tf.train.get_or_create_global_step() logging_hook = tf.train.LoggingTensorHook(hook_dict, every_n_iter=100) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, training_hooks=[logging_hook]) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss_, label_ids_, logits_, is_real_example_): predictions = tf.argmax(logits_, axis=-1, output_type=tf.int32) accuracy = tf.metrics.accuracy(labels=label_ids_, predictions=predictions, weights=is_real_example_) loss = tf.metrics.mean(values=per_example_loss_, weights=is_real_example_) return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [ per_example_loss, label_ids, logits, is_real_example ]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec(mode=mode, predictions={ "probabilities": probabilities, "predictions": predicts }) return output_spec
def model_fn(features, labels, mode, params): logging.info("*** Features ***") for name in sorted(features.keys()): logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] mask = features["mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) if FLAGS.crf: (total_loss, logits, predicts) = create_model(bert_config, is_training, input_ids, mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) else: (total_loss, logits, predicts) = create_model(bert_config, is_training, input_ids, mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.trainable_variables() scaffold_fn = None initialized_variable_names = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(label_ids, logits, num_labels, mask): predictions = tf.math.argmax(logits, axis=-1, output_type=tf.int32) cm = metrics.streaming_confusion_matrix(label_ids, predictions, num_labels - 1, weights=mask) return {"confusion_matrix": cm} # eval_metrics = (metric_fn, [label_ids, logits, num_labels, mask]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=predicts, scaffold_fn=scaffold_fn) return output_spec
def model_fn( features, labels, mode, params ): """The `model_fn` for TPUEstimator.""" # The function signature is fixed as part of the estimator interface. # We pass task-specific labels as part of `features` and hence `labels` is # unused. `params` is for runtime parameters passed around by the estimator # framework and they are not used by us. # The unused parameters are deleted below. del labels, params tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s", name, features[name].shape) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] candidate_mask = features["candidate_mask"] error_location_mask = features["error_location_mask"] target_mask = features["target_mask"] sequence_length = tf.shape(input_ids)[1] if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(input_ids)[0], dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, _, probabilities) = ( create_original_varmisuse_model( bert_config=bert_config, is_training=is_training, enable_sequence_masking=enable_sequence_masking, input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, candidate_mask=candidate_mask, target_mask=target_mask, error_location_mask=error_location_mask, use_one_hot_embeddings=use_one_hot_embeddings)) tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names) = ( modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer( total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) return output_spec elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn( per_example_loss, probabilities, error_location_mask, target_mask, is_real_example): """Metric function.""" buggy_mask = tf.equal(error_location_mask[:, 0], 0) non_buggy_mask = tf.logical_not(buggy_mask) location_probabilities, repair_probabilities = tf.unstack( probabilities, axis=2) predicted_error_locations = tf.argmax( location_probabilities, axis=1, output_type=tf.int32) predicted_repair_locations = tf.argmax( repair_probabilities, axis=1, output_type=tf.int32) non_buggy_predictions = tf.equal(predicted_error_locations, 0) predicted_error_locations_one_hot = tf.one_hot( predicted_error_locations, sequence_length, dtype=tf.int32) predicted_repair_locations_one_hot = tf.one_hot( predicted_repair_locations, sequence_length, dtype=tf.int32) classification_accuracy = tf.metrics.accuracy( labels=non_buggy_mask, predictions=non_buggy_predictions, weights=is_real_example) true_positive_rate = tf.metrics.accuracy( labels=non_buggy_mask, predictions=non_buggy_predictions, weights=is_real_example * tf.cast(non_buggy_mask, tf.float32)) correct_location_predictions = tf.reduce_sum( tf.multiply( predicted_error_locations_one_hot, error_location_mask), axis=1) # We can have more than one valid repair locations, so `target_mask` # can have multiple ones in it. The following calculation yields 1 # if the predicted repair location is one of the valid repair locations. correct_repair_predictions = tf.reduce_sum( tf.multiply( predicted_repair_locations_one_hot, target_mask), axis=1) correct_localization_repair_predictions = ( correct_location_predictions * correct_repair_predictions) localization_accuracy = tf.metrics.accuracy( labels=tf.cast(buggy_mask, tf.int32), predictions=correct_location_predictions, weights=is_real_example * tf.cast(buggy_mask, tf.float32)) repair_accuracy = tf.metrics.accuracy( labels=tf.cast(buggy_mask, tf.int32), predictions=correct_repair_predictions, weights=is_real_example * tf.cast(buggy_mask, tf.float32)) localization_repair_accuracy = tf.metrics.accuracy( labels=tf.cast(buggy_mask, tf.int32), predictions=correct_localization_repair_predictions, weights=is_real_example * tf.cast(buggy_mask, tf.float32)) loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) return { "eval_accuracy_classification": classification_accuracy, "eval_true_positive_rate": true_positive_rate, "eval_accuracy_localization": localization_accuracy, "eval_accuracy_repair": repair_accuracy, "eval_accuracy_localization_repair": localization_repair_accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [per_example_loss, probabilities, error_location_mask, target_mask, is_real_example]) output_spec = contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) return output_spec else: output_spec = contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities}, scaffold_fn=scaffold_fn) return output_spec