def main(_): vocab_word2index, _ = create_or_load_vocabulary( FLAGS.data_path, FLAGS.mask_lm_source_file, FLAGS.vocab_size, test_mode=FLAGS.test_mode, tokenize_style=FLAGS.tokenize_style) vocab_size = len(vocab_word2index) print("bert_pertrain_lm.vocab_size:", vocab_size) index2word = {v: k for k, v in vocab_word2index.items()} #train,valid,test=mask_language_model(FLAGS.mask_lm_source_file,FLAGS.data_path,index2word,max_allow_sentence_length=FLAGS.max_allow_sentence_length,test_mode=FLAGS.test_mode) train, valid, test = mask_language_model( FLAGS.mask_lm_source_file, FLAGS.data_path, index2word, max_allow_sentence_length=FLAGS.max_allow_sentence_length, test_mode=FLAGS.test_mode, process_num=FLAGS.process_num) train_X, train_y, train_p = train valid_X, valid_y, valid_p = valid test_X, test_y, test_p = test print("length of training data:", train_X.shape, ";train_Y:", train_y.shape, ";train_p:", train_p.shape, ";valid data:", valid_X.shape, ";test data:", test_X.shape) # 1.create session. gpu_config = tf.ConfigProto() gpu_config.gpu_options.allow_growth = True with tf.Session(config=gpu_config) as sess: #Instantiate Model config = set_config(FLAGS, vocab_size, vocab_size) model = BertModel(config) #Initialize Save saver = tf.train.Saver() if os.path.exists(FLAGS.ckpt_dir + "checkpoint"): print("Restoring Variables from Checkpoint.") saver.restore(sess, tf.train.latest_checkpoint(FLAGS.ckpt_dir)) for i in range(2): #decay learning rate if necessary. print(i, "Going to decay learning rate by half.") sess.run(model.learning_rate_decay_half_op) else: print('Initializing Variables') sess.run(tf.global_variables_initializer()) if FLAGS.use_pretrained_embedding: vocabulary_index2word = { index: word for word, index in vocab_word2index.items() } assign_pretrained_word_embedding( sess, vocabulary_index2word, vocab_size, FLAGS.word2vec_model_path, model.embedding, config.d_model) # assign pretrained word embeddings curr_epoch = sess.run(model.epoch_step) # 2.feed data & training number_of_training_data = len(train_X) print("number_of_training_data:", number_of_training_data) batch_size = FLAGS.batch_size iteration = 0 score_best = -100 for epoch in range(curr_epoch, FLAGS.num_epochs): loss_total_lm, counter = 0.0, 0 for start, end in zip( range(0, number_of_training_data, batch_size), range(batch_size, number_of_training_data, batch_size)): iteration = iteration + 1 if epoch == 0 and counter == 0: print("trainX[start:end]:", train_X[start:end], "train_X.shape:", train_X.shape) feed_dict = { model.x_mask_lm: train_X[start:end], model.y_mask_lm: train_y[start:end], model.p_mask_lm: train_p[start:end], model.dropout_keep_prob: FLAGS.dropout_keep_prob } current_loss_lm, lr, l2_loss, _ = sess.run([ model.loss_val_lm, model.learning_rate, model.l2_loss_lm, model.train_op_lm ], feed_dict) loss_total_lm, counter = loss_total_lm + current_loss_lm, counter + 1 if counter % 30 == 0: print( "%d\t%d\tLearning rate:%.5f\tLoss_lm:%.3f\tCurrent_loss_lm:%.3f\tL2_loss:%.3f\t" % (epoch, counter, lr, float(loss_total_lm) / float(counter), current_loss_lm, l2_loss)) if start != 0 and start % (800 * FLAGS.batch_size) == 0: # epoch!=0 loss_valid, acc_valid = do_eval(sess, model, valid, batch_size) print( "%d\tValid.Epoch %d ValidLoss:%.3f\tAcc_valid:%.3f\t" % (counter, epoch, loss_valid, acc_valid * 100)) # save model to checkpoint if acc_valid > score_best: save_path = FLAGS.ckpt_dir + "model.ckpt" print("going to save check point.") saver.save(sess, save_path, global_step=epoch) score_best = acc_valid sess.run(model.epoch_increment)
def main(_): # 1.load vocabulary of token from cache file save from pre-trained stage; load label dict from training file; print some message. vocab_word2index, _ = create_or_load_vocabulary( FLAGS.data_path, FLAGS.training_data_file, FLAGS.vocab_size, test_mode=FLAGS.test_mode, tokenize_style=FLAGS.tokenize_style) # label2index=get_lable2index(FLAGS.data_path,FLAGS.training_data_file, tokenize_style=FLAGS.tokenize_style) label2index = {'0': 0, '1': 1, '2': 2, '3': 3} vocab_size = len(vocab_word2index) print("cnn_model.vocab_size:", vocab_size) num_classes = len(label2index) print("num_classes:", num_classes) # load training data. train, valid, test = load_data_multilabel( FLAGS.data_path, FLAGS.training_data_file, FLAGS.valid_data_file, FLAGS.test_data_file, vocab_word2index, label2index, FLAGS.sequence_length, process_num=FLAGS.process_num, test_mode=FLAGS.test_mode, tokenize_style=FLAGS.tokenize_style) train_X, train_Y = train valid_X, valid_Y = valid test_X, test_Y = test print("test_model:", FLAGS.test_mode, ";length of training data:", train_X.shape, ";valid data:", valid_X.shape, ";test data:", test_X.shape, ";train_Y:", train_Y.shape) # 2.create session. gpu_config = tf.ConfigProto() gpu_config.gpu_options.allow_growth = True with tf.Session(config=gpu_config) as sess: #Instantiate Model config = set_config(FLAGS, num_classes, vocab_size) model = BertModel(config) #Initialize Save saver = tf.train.Saver() #if os.path.exists(FLAGS.ckpt_dir+"checkpoint"): if True: print("Restoring Variables from Checkpoint.") sess.run(tf.global_variables_initializer()) for i in range(6): #decay learning rate if necessary. print( i, "Going to decay learning rate by a factor of " + str(FLAGS.decay_rate)) sess.run(model.learning_rate_decay_half_op) # restore those variables that names and shapes exists in your model from checkpoint. for detail check: https://gist.github.com/iganichev/d2d8a0b1abc6b15d4a07de83171163d4 optimistic_restore( sess, tf.train.latest_checkpoint(FLAGS.ckpt_dir) ) #saver.restore(sess,tf.train.latest_checkpoint(FLAGS.ckpt_dir)) else: print('Initializing Variables as model instance is not exist.') sess.run(tf.global_variables_initializer()) if FLAGS.use_pretrained_embedding: vocabulary_index2word = { index: word for word, index in vocab_word2index.items() } assign_pretrained_word_embedding( sess, vocabulary_index2word, vocab_size, FLAGS.word2vec_model_path, model.embedding, config.d_model) # assign pretrained word embeddings curr_epoch = sess.run(model.epoch_step) # 3.feed data & training number_of_training_data = len(train_X) batch_size = FLAGS.batch_size iteration = 0 score_best = -100 f1_score = 0 epoch = 0 for epoch in range(curr_epoch, FLAGS.num_epochs): loss_total, counter = 0.0, 0 for start, end in zip( range(0, number_of_training_data, batch_size), range(batch_size, number_of_training_data, batch_size)): iteration = iteration + 1 if epoch == 0 and counter == 0: print("trainX[start:end]:", train_X[start:end], "train_X.shape:", train_X.shape) feed_dict = { model.input_x: train_X[start:end], model.input_y: train_Y[start:end], model.dropout_keep_prob: FLAGS.dropout_keep_prob } current_loss, lr, l2_loss, _ = sess.run([ model.loss_val, model.learning_rate, model.l2_loss, model.train_op ], feed_dict) loss_total, counter = loss_total + current_loss, counter + 1 if counter % 30 == 0: print( "Learning rate:%.7f\tLoss:%.3f\tCurrent_loss:%.3f\tL2_loss%.3f\t" % (lr, float(loss_total) / float(counter), current_loss, l2_loss)) #if start!=0 and start%(1000*FLAGS.batch_size)==0: # loss_valid, f1_macro_valid, f1_micro_valid= do_eval(sess, model, valid,num_classes,label2index) # f1_score_valid=((f1_macro_valid+f1_micro_valid)/2.0) #*100.0 # print("Valid.Epoch %d ValidLoss:%.3f\tF1_score_valid:%.3f\tMacro_f1:%.3f\tMicro_f1:%.3f\t" % (epoch, loss_valid, f1_score_valid, f1_macro_valid, f1_micro_valid)) # save model to checkpoint # if f1_score_valid>score_best: # save_path = FLAGS.ckpt_dir_save + "model.ckpt" # print("going to save check point.") # saver.save(sess, save_path, global_step=epoch) # score_best=f1_score_valid #epoch increment print("going to increment epoch counter....") sess.run(model.epoch_increment) # 4.validation print(epoch, FLAGS.validate_every, (epoch % FLAGS.validate_every == 0)) if epoch % FLAGS.validate_every == 0: # loss_valid,f1_macro_valid2,f1_micro_valid2=do_eval(sess,model,valid,num_classes,label2index) # f1_score_valid2 = ((f1_macro_valid2 + f1_micro_valid2) / 2.0) #* 100.0 # print("Valid.Epoch %d ValidLoss:%.3f\tF1 score:%.3f\tMacro_f1:%.3f\tMicro_f1:%.3f\t"% (epoch,loss_valid,f1_score_valid2,f1_macro_valid2,f1_micro_valid2)) print("epoch %d", epoch) do_eval_acc(sess, model, valid, num_classes, label2index) #save model to checkpoint # if f1_score_valid2 > score_best: # save_path=FLAGS.ckpt_dir_save+"model.ckpt" # print("going to save check point.") # saver.save(sess,save_path,global_step=epoch) # score_best = f1_score_valid2 if (epoch == 2 or epoch == 4 or epoch == 6 or epoch == 9 or epoch == 13): for i in range(1): print(i, "Going to decay learning rate by half.") sess.run(model.learning_rate_decay_half_op) # 5.report on test set #loss_test, f1_macro_test, f1_micro_test=do_eval(sess, model, test,num_classes, label2index) #f1_score_test=((f1_macro_test + f1_micro_test) / 2.0) * 100.0 #print("Test.Epoch %d TestLoss:%.3f\tF1_score:%.3f\tMacro_f1:%.3f\tMicro_f1:%.3f\t" % (epoch, loss_test, f1_score_test,f1_macro_test, f1_micro_test)) print("test data ") do_eval_acc(sess, model, test, num_classes, label2index) print("training completed...")