def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.compat.v1.logging.info("*** Features ***") for name in sorted(features.keys()): tf.compat.v1.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = create_model(is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, bert_hub_module_handle) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits): predictions = tf.argmax(input=logits, axis=-1, output_type=tf.int32) accuracy = tf.compat.v1.metrics.accuracy( label_ids, predictions) loss = tf.compat.v1.metrics.mean(per_example_loss) return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [per_example_loss, label_ids, logits]) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics) elif mode == tf.estimator.ModeKeys.PREDICT: output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities}) else: raise ValueError( "Only TRAIN, EVAL and PREDICT modes are supported: %s" % (mode)) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.compat.v1.logging.info("*** Features ***") for name in sorted(features.keys()): tf.compat.v1.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] masked_lm_positions = features["masked_lm_positions"] masked_lm_ids = features["masked_lm_ids"] masked_lm_weights = features["masked_lm_weights"] next_sentence_labels = features["next_sentence_labels"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) (masked_lm_loss, masked_lm_example_loss, masked_lm_log_probs) = get_masked_lm_output( bert_config, model.get_sequence_output(), model.get_embedding_table(), masked_lm_positions, masked_lm_ids, masked_lm_weights) (next_sentence_loss, next_sentence_example_loss, next_sentence_log_probs) = get_next_sentence_output( bert_config, model.get_pooled_output(), next_sentence_labels) total_loss = masked_lm_loss + next_sentence_loss tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.compat.v1.train.init_from_checkpoint( init_checkpoint, assignment_map) return tf.compat.v1.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.compat.v1.train.init_from_checkpoint( init_checkpoint, assignment_map) tf.compat.v1.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.compat.v1.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels): """Computes the loss and accuracy of the model.""" masked_lm_log_probs = tf.reshape( masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]]) masked_lm_predictions = tf.argmax(input=masked_lm_log_probs, axis=-1, output_type=tf.int32) masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1]) masked_lm_ids = tf.reshape(masked_lm_ids, [-1]) masked_lm_weights = tf.reshape(masked_lm_weights, [-1]) masked_lm_accuracy = tf.compat.v1.metrics.accuracy( labels=masked_lm_ids, predictions=masked_lm_predictions, weights=masked_lm_weights) masked_lm_mean_loss = tf.compat.v1.metrics.mean( values=masked_lm_example_loss, weights=masked_lm_weights) next_sentence_log_probs = tf.reshape( next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]]) next_sentence_predictions = tf.argmax( input=next_sentence_log_probs, axis=-1, output_type=tf.int32) next_sentence_labels = tf.reshape(next_sentence_labels, [-1]) next_sentence_accuracy = tf.compat.v1.metrics.accuracy( labels=next_sentence_labels, predictions=next_sentence_predictions) next_sentence_mean_loss = tf.compat.v1.metrics.mean( values=next_sentence_example_loss) return { "masked_lm_accuracy": masked_lm_accuracy, "masked_lm_loss": masked_lm_mean_loss, "next_sentence_accuracy": next_sentence_accuracy, "next_sentence_loss": next_sentence_mean_loss, } eval_metrics = (metric_fn, [ masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids, masked_lm_weights, next_sentence_example_loss, next_sentence_log_probs, next_sentence_labels ]) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: raise ValueError("Only TRAIN and EVAL modes are supported: %s" % (mode)) return output_spec
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.compat.v1.logging.info("*** Features ***") for name in sorted(features.keys()): tf.compat.v1.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] label_ids = features["label_ids"] is_real_example = None if "is_real_example" in features: is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32) else: is_real_example = tf.ones(tf.shape(input=label_ids), dtype=tf.float32) is_training = (mode == tf.estimator.ModeKeys.TRAIN) (total_loss, per_example_loss, logits, probabilities) = create_model(bert_config, is_training, input_ids, input_mask, segment_ids, label_ids, num_labels, use_one_hot_embeddings) tvars = tf.compat.v1.trainable_variables() initialized_variable_names = {} scaffold_fn = None if init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.compat.v1.train.init_from_checkpoint( init_checkpoint, assignment_map) return tf.compat.v1.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.compat.v1.train.init_from_checkpoint( init_checkpoint, assignment_map) tf.compat.v1.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.compat.v1.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) output_spec = None if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(per_example_loss, label_ids, logits, is_real_example): predictions = tf.argmax(input=logits, axis=-1, output_type=tf.int32) accuracy = tf.compat.v1.metrics.accuracy( labels=label_ids, predictions=predictions, weights=is_real_example) loss = tf.compat.v1.metrics.mean(values=per_example_loss, weights=is_real_example) return { "eval_accuracy": accuracy, "eval_loss": loss, } eval_metrics = (metric_fn, [ per_example_loss, label_ids, logits, is_real_example ]) output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.compat.v1.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions={"probabilities": probabilities}, scaffold_fn=scaffold_fn) return output_spec
def main(): print("print start load the params...") print(json.dumps(config, ensure_ascii=False, indent=2)) tf.logging.set_verbosity(tf.logging.INFO) tf.io.gfile.makedirs(config["out"]) tf.io.gfile.makedirs(config["train_logs_path"]) tf.io.gfile.makedirs(config["dev_logs_path"]) train_examples_len = config["train_examples_len"] dev_examples_len = config["dev_examples_len"] learning_rate_init = config["learning_rate"] eval_per_step = config["eval_per_step"] num_labels = config["num_labels"] num_train_steps = math.ceil(train_examples_len / config["train_batch_size"]) num_dev_steps = math.ceil(dev_examples_len / config["dev_batch_size"]) num_warmup_steps = math.ceil(num_train_steps * config["num_train_epochs"] * config["warmup_proportion"]) print("num_train_steps:{}, num_dev_steps:{}, num_warmup_steps:{}".format( num_train_steps, num_dev_steps, num_warmup_steps)) use_one_hot_embeddings = False is_training = True use_tpu = False seq_len = config["max_seq_len"] init_checkpoint = config["init_checkpoint"] print("print start compile the bert model...") # 定义输入输出 input_ids = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_ids') input_mask = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_mask') segment_ids = tf.placeholder(tf.int64, shape=[None, seq_len], name='segment_ids') labels = tf.placeholder(tf.int64, shape=[None, seq_len], name='labels') keep_prob = tf.placeholder(tf.float32, name='keep_prob') # , name='is_training' bert_config_ = load_bert_config(config["bert_config"]) (total_loss, acc, logits, probabilities) = create_model(bert_config_, is_training, input_ids, input_mask, segment_ids, labels, keep_prob, num_labels, use_one_hot_embeddings) train_op, learning_rate = optimization.create_optimizer( total_loss, learning_rate_init, num_train_steps * config["num_train_epochs"], num_warmup_steps, False) print("print start train the bert model...") batch_size = config["train_batch_size"] dev_batch_size = config["dev_batch_size"] init_global = tf.global_variables_initializer() saver = tf.train.Saver([ v for v in tf.global_variables() if 'adam_v' not in v.name and 'adam_m' not in v.name ], max_to_keep=2) # 保存最后top3模型 with tf.Session() as sess: sess.run(init_global) train_summary_writer = tf.summary.FileWriter(config["train_logs_path"]) dev_summary_writer = tf.summary.FileWriter(config["dev_logs_path"]) print("start load the pre train model") if init_checkpoint: # tvars = tf.global_variables() tvars = tf.trainable_variables() print("trainable_variables", len(tvars)) (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) print("initialized_variable_names:", len(initialized_variable_names)) saver_ = tf.train.Saver( [v for v in tvars if v.name in initialized_variable_names]) saver_.restore(sess, init_checkpoint) tvars = tf.global_variables() initialized_vars = [ v for v in tvars if v.name in initialized_variable_names ] not_initialized_vars = [ v for v in tvars if v.name not in initialized_variable_names ] tf.logging.info('--all size %s; not initialized size %s' % (len(tvars), len(not_initialized_vars))) if len(not_initialized_vars): sess.run(tf.variables_initializer(not_initialized_vars)) for v in initialized_vars: print('--initialized: %s, shape = %s' % (v.name, v.shape)) for v in not_initialized_vars: print('--not initialized: %s, shape = %s' % (v.name, v.shape)) else: sess.run(tf.global_variables_initializer()) # if init_checkpoint: # saver.restore(sess, init_checkpoint) # print("checkpoint restored from %s" % init_checkpoint) print("********* train start *********") # tf.summary.FileWriter("output/",sess.graph) # albert remove dropout def train_step(ids, mask, segment, y, step): feed = { input_ids: ids, input_mask: mask, segment_ids: segment, labels: y, keep_prob: 0.9 } _, lr, out_loss, acc_, p_ = sess.run( [train_op, learning_rate, total_loss, acc, probabilities], feed_dict=feed) with train_summary_writer.as_default(): tf.summary.scalar('learning_rate', lr, step) tf.summary.scalar('loss', out_loss, step) tf.summary.scalar('accuracy', acc_, step) print("step :{}, lr:{}, loss :{}, acc :{}".format( step, lr, out_loss, acc_)) return out_loss, p_, y def dev_step(ids, mask, segment, y, step): feed = { input_ids: ids, input_mask: mask, segment_ids: segment, labels: y, keep_prob: 1.0 } out_loss, acc_, p_ = sess.run([total_loss, acc, probabilities], feed_dict=feed) with dev_summary_writer.as_default(): tf.summary.scalar('dev_loss', out_loss, step) tf.summary.scalar('dev_accuracy', acc_, step) print("loss :{}, acc :{}".format(out_loss, acc_)) return out_loss, p_, y # min_total_loss_dev = 999999 min_total_loss_dev = np.Inf step = 0 for epoch in range(config["num_train_epochs"]): _ = "{:*^100s}".format(("epoch-" + str(epoch)).center(20)) print(_) # 读取训练数据 total_loss_train = 0 # total_pre_train = [] # total_true_train = [] input_ids2, input_mask2, segment_ids2, labels2 = get_input_data( config["in_1"], seq_len, batch_size) for i in range(num_train_steps): step += 1 ids_train, mask_train, segment_train, y_train = sess.run( [input_ids2, input_mask2, segment_ids2, labels2]) out_loss, pre, y = train_step(ids_train, mask_train, segment_train, y_train, step) total_loss_train += out_loss # total_pre_train.extend(pre) # total_true_train.extend(y) if step % eval_per_step == 0 and step >= config[ "eval_start_step"]: total_loss_dev = 0 dev_input_ids2, dev_input_mask2, dev_segment_ids2, dev_labels2 = get_input_data( config["in_2"], seq_len, dev_batch_size, False) # total_pre_dev = [] # total_true_dev = [] for j in range(num_dev_steps): # 一个 epoch 的 轮数 ids_dev, mask_dev, segment_dev, y_dev = sess.run([ dev_input_ids2, dev_input_mask2, dev_segment_ids2, dev_labels2 ]) out_loss, pre, y = dev_step(ids_dev, mask_dev, segment_dev, y_dev, step) total_loss_dev += out_loss # total_pre_dev.extend(pre) # total_true_dev.extend(y_dev) print("total_loss_dev:{}".format(total_loss_dev)) # print(classification_report(total_true_dev, total_pre_dev, digits=4)) if total_loss_dev < min_total_loss_dev: print("save model:\t%f\t>%f" % (min_total_loss_dev, total_loss_dev)) min_total_loss_dev = total_loss_dev saver.save(sess, config["out"] + 'bert.ckpt', global_step=step) elif step < config[ "eval_start_step"] and step % config["auto_save"] == 0: saver.save(sess, config["out"] + 'bert.ckpt', global_step=step) _ = "{:*^100s}".format( ("epoch-" + str(epoch) + " report:").center(20)) print("total_loss_train:{}".format(total_loss_train)) # print(classification_report(total_true_train, total_pre_train, digits=4)) train_summary_writer.close() dev_summary_writer.close() sess.close() # remove dropout print("remove dropout in predict") tf.reset_default_graph() is_training = False input_ids = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_ids') input_mask = tf.placeholder(tf.int64, shape=[None, seq_len], name='input_mask') segment_ids = tf.placeholder(tf.int64, shape=[None, seq_len], name='segment_ids') labels = tf.placeholder(tf.int64, shape=[None, seq_len], name='labels') keep_prob = tf.placeholder(tf.float32, name='keep_prob') # , name='is_training' bert_config_ = load_bert_config(config["bert_config"]) (total_loss, _, logits, probabilities) = create_model(bert_config_, is_training, input_ids, input_mask, segment_ids, labels, keep_prob, num_labels, use_one_hot_embeddings) init_global = tf.global_variables_initializer() saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) # 保存最后top3模型 try: checkpoint = tf.train.get_checkpoint_state(config["out"]) input_checkpoint = checkpoint.model_checkpoint_path print("[INFO] input_checkpoint:", input_checkpoint) except Exception as e: input_checkpoint = config["out"] print("[INFO] Model folder", config["out"], repr(e)) with tf.Session() as sess: sess.run(init_global) saver.restore(sess, input_checkpoint) saver.save(sess, config["out_1"] + 'bert.ckpt') sess.close()