def main(): # Load data print("Loading data...") inputH = InputHelper() task_num = 1 name = "des" if task_num == 1 else "opr" # train_f = "./data/exp0803/training_dynamic_data.txt" # dev_f = "./data/exp0803/validation_dynamic_data.txt" # test_f = "./data/exp0803/test_dynamic_data.txt" # prepara_dynamic_tensor(inputH, train_f, dev_f, test_f, FLAGS.max_sequence_len, FLAGS.max_sequence_len2) time_gen = "0823" data_file = os.path.join(FLAGS.train_dir, "data/train_data_" + time_gen + "_" + name + ".txt") data_file_test = os.path.join(FLAGS.train_dir, "data/test_data_" + time_gen + "_" + name + ".txt") data_file_val = data_file_test # data_file_val = os.path.join(FLAGS.train_dir, "data/validation_data_" + time_gen + "_" + name + ".txt") prepara_tensor_y_seperate(inputH, data_file, data_file_val, data_file_test, "\t", FLAGS.max_sequence_len, name, task_num)
def main(): # Load data print("Loading data...") inputH = InputHelper() date_f = "0823" train_f = "./data/exp" + date_f + "/data_augment_train.txt" test_f = "./data/exp" + date_f + "/data_augment_test.txt" dev_f = test_f our_dir = "./Tensor_files/" + date_f + "/Length" + str(FLAGS.max_sequence_len) + "/" x_train_tensor = np.load(our_dir + "train_des.npy") # x_dev_tensor = np.load(our_dir + "dev_des.npy") x_test_tensor = np.load(our_dir + "test_des.npy") x_dev_tensor = x_test_tensor our_dir = "./Tensor_files/" + date_f + "/Length" + str(FLAGS.max_sequence_len2) + "/" x_train_tensor_o = np.load(our_dir + "train_opr.npy") # x_dev_tensor_o = np.load(our_dir + "dev_opr.npy") x_test_tensor_o = np.load(our_dir + "test_opr.npy") x_dev_tensor_o = x_test_tensor_o def normalize(a): amin, amax = a.min(), a.max() # 求最大最小值 a = (a - amin) / (amax - amin) # (矩阵元素-最小值)/(最大值-最小值) return a def normalize_tensor(t): t[:, :, :, 0] = normalize(t[:, :, :, 0]) t[:, :, :, 1] = normalize(t[:, :, :, 1]) t[:, :, :, 2] = normalize(t[:, :, :, 2]) t[:, :, :, 3] = normalize(t[:, :, :, 3]) return t x_test_tensor[:, :, :, 3] = normalize(x_test_tensor[:, :, :, 3]) x_train_tensor[:, :, :, 3] = normalize(x_train_tensor[:, :, :, 3]) x_test_tensor_o[:, :, :, 3] = normalize(x_test_tensor_o[:, :, :, 3]) x_train_tensor_o[:, :, :, 3] = normalize(x_train_tensor_o[:, :, :, 3]) # x_test_tensor = normalize_tensor(x_test_tensor) # x_test_tensor_o = normalize_tensor(x_test_tensor_o) # x_train_tensor = normalize_tensor(x_train_tensor) # x_train_tensor_o = normalize_tensor(x_train_tensor_o) sep = "\t" x1_train, x2_train, x3_train, x4_train, y_train, y2_train = inputH.getTsvTestData_Mul_Labels(train_f, sep, FLAGS.max_sequence_len) # x1_dev, x2_dev, x3_dev, x4_dev, y_dev, y2_dev = inputH.getTsvTestData_Mul_Labels(dev_f, sep, FLAGS.max_sequence_len) x1_test, x2_test, x3_test, x4_test, y_test, y2_test = inputH.getTsvTestData_Mul_Labels(test_f, sep, FLAGS.max_sequence_len) x1_dev, x2_dev, x3_dev, x4_dev, y_dev, y2_dev = x1_test, x2_test, x3_test, x4_test, y_test, y2_test des_e_names, des_opr_map = load_coocurrence_matrix("coorrence_file.txt") N_default = 0.01 co_arr_test = get_coocurrence(des_e_names, des_opr_map, x2_test, x4_test, N_default) co_arr_train = get_coocurrence(des_e_names, des_opr_map, x2_train, x4_train, N_default) # co_arr_val = get_coocurrence(des_e_names, des_opr_map, x2_dev, x4_dev) co_arr_val = co_arr_test with tf.Graph().as_default(): sess = tf.Session() with sess.as_default(): cnn = MultiTask_MultiGranModel( max_len1=FLAGS.max_sequence_len, max_len2=FLAGS.max_sequence_len2, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), filter_sizes2=list(map(int, FLAGS.filter_sizes2.split(","))), pool_sizes=list(map(int, FLAGS.pool_sizes.split(","))), pool_sizes2=list(map(int, FLAGS.pool_sizes2.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda, constraint_lambda=FLAGS.con_lambda, alpha=FLAGS.alpha, type_CNN=FLAGS.type_CNN, view_num=FLAGS.view_num, view_nums=list(map(int, FLAGS.view_nums.split(","))) ) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) saver = tf.train.Saver(tf.all_variables(), max_to_keep=20) # Keep track of gradient values and sparsity (optional) for g, v in grads_and_vars: if g is not None: tf.summary.histogram("grad_hist/{}".format(v.name), g) tf.summary.scalar("grad_sparsity/{}".format(v.name), tf.nn.zero_fraction(g)) tf.summary.histogram(v.name, v) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(FLAGS.train_dir, "runs", "alpha_E", "multitask" + timestamp)) if not os.path.exists(out_dir): os.makedirs(out_dir) print("Writing to {}\n".format(out_dir)) checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", cnn.loss) constraint_summary = tf.summary.scalar("constraints", cnn.constraints) acc_summary1 = tf.summary.scalar("accuracy1", cnn.accuracy_d) acc_summary2 = tf.summary.scalar("accuracy2", cnn.accuracy_o) # Train Summaries train_summary_op = tf.summary.merge_all() train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary1, acc_summary2, constraint_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Initialize all variables sess.run(tf.initialize_all_variables()) def train_step(x_batch, y_batch, x_batch2, y_batch2, co_arr): feed_dict = { cnn.input_tensor: x_batch, cnn.input_y_description: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob, cnn.input_tensor_o: x_batch2, cnn.input_y_operation: y_batch2, cnn.matrix: co_arr, } _, step, summaries, loss, accuracy1, accuracy2 = sess.run( [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy_d, cnn.accuracy_o], feed_dict) time_str = datetime.datetime.now().isoformat() if step % 10 == 0: print( "{}: step {}, loss {:g}, acc1 {:g}, acc2 {:g}".format(time_str, step, loss, accuracy1, accuracy2)) train_summary_writer.add_summary(summaries, step) return accuracy1, accuracy2, loss def dev_step(x_dev, y_batch_dev, x_dev2, y_batch_dev2, co_arr, writer=None): feed_dict = { cnn.input_tensor: x_dev, cnn.input_y_description: y_batch_dev, cnn.dropout_keep_prob: 1.0, cnn.input_y_operation: y_batch_dev2, cnn.input_tensor_o: x_dev2, cnn.matrix: co_arr } step, summaries, loss, accuracy1, accuracy2, pres1, pres2 = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy_d, cnn.accuracy_o, cnn.scores_d, cnn.scores_o], feed_dict) if writer: writer.add_summary(summaries, step) return loss, accuracy1, accuracy2 def evaluate(x_dev, y_batch_dev, x_dev2, y_batch_dev2, co_arr): feed_dict = { cnn.input_tensor: x_dev, cnn.input_y_description: y_batch_dev, cnn.dropout_keep_prob: 1.0, cnn.input_y_operation: y_batch_dev2, cnn.input_tensor_o: x_dev2, cnn.matrix: co_arr } step, summaries, loss, accuracy1, accuracy2, pres1, pres2 = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy_d, cnn.accuracy_o, cnn.scores_d, cnn.scores_o], feed_dict) eval_file = open(out_dir + "/evaluation.txt", "w+") right_file = open(out_dir + "/right_cases.txt", "w+") right_file2 = open(out_dir + "/right_cases_operation.txt", "w+") wrong_file = open(out_dir + "/wrong_cases.txt", "w+") wrong_file2 = open(out_dir + "/wrong_cases_operation.txt", "w+") eval_file.write("Accu1: " + str(accuracy1) + "\n") eval_file.write("Accu2: " + str(accuracy2) + "\n") predictions1 = np.argmax(pres1, 1) predictions2 = np.argmax(pres2, 1) labels1 = np.argmax(y_batch_dev, 1) labels2 = np.argmax(y_batch_dev2, 1) write_evaluation_file(eval_file, right_file, wrong_file, labels1, predictions1, x1_test, x2_test) write_evaluation_file(eval_file, right_file2, wrong_file2, labels2, predictions2, x3_test, x4_test) eval_file.write("Parameters:") for attr, value in sorted(FLAGS.__flags.items()): eval_file.write("{}={}".format(attr.upper(), value) + "\n") return loss, accuracy1, accuracy2 def dev_whole(x_dev, y_dev, x_dev2, y_dev2, co_arr, writer=None): batches_dev = inputH.batch_iter(list(zip(x_dev, y_dev, co_arr)), FLAGS.batch_size, 1, shuffle=False) batches_dev2 = inputH.batch_iter(list(zip(x_dev2, y_dev2)), FLAGS.batch_size, 1, shuffle=False) losses = [] accuracies1 = [] accuracies2 = [] batches = zip(batches_dev, batches_dev2) for batches_dev, batches_dev2 in batches: x_batch, y_batch, co_arr_ = zip(*batches_dev) x_batch2, y_batch2 = zip(*batches_dev2) loss, accuracy1, accuracy2 = dev_step(x_batch, y_batch, x_batch2, y_batch2, co_arr_) losses.append(loss) accuracies1.append(accuracy1) accuracies2.append(accuracy2) return np.mean(np.array(losses)), np.mean(np.array(accuracies1)), np.mean(np.array(accuracies2)) def overfit(dev_loss, accu): num = FLAGS.early_stop_num n = len(dev_loss) if n < num: return False for i in xrange(n - num, n - 1): if dev_loss[i] < accu: return False print(dev_loss) print(accu) return True # Generate batches batches = inputH.batch_iter(list(zip(x_train_tensor, y_train, x_train_tensor_o, y2_train, co_arr_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... dev_loss = [] train_loss = [] train_accu = [] train_accu2 = [] dev_accu = [] dev_accu2 = [] # batch_d_o = zip(batches, batches2) optimum_accu1 = 0 optimum_accu2 = 0 data_num = len(y_train) num_batches_per_epoch = int(data_num / FLAGS.batch_size) # t = num_batches_per_epoch / 2 optimum_loss = 1000 for batch in batches: x_batch, y_batch, x_batch2, y_batch2, co_arr_batch = zip(*batch) acc1, acc2, loss_train = train_step(x_batch, y_batch, x_batch2, y_batch2, co_arr_batch) train_accu.append(acc1) train_accu2.append(acc2) train_loss.append(loss_train) current_step = tf.train.global_step(sess, global_step) if current_step % num_batches_per_epoch == 0: print("\nEvaluation:") loss, accuracy1, accuracy2 = dev_whole(x_dev_tensor, y_dev, x_dev_tensor_o, y2_dev, co_arr_val, writer=dev_summary_writer) summary = tf.Summary() summary.value.add(tag="Accuracy_Dev", simple_value=accuracy1) summary.value.add(tag="Accuracy2_Dev", simple_value=accuracy2) summary.value.add(tag="Loss_Dev", simple_value=loss) dev_summary_writer.add_summary(summary, current_step) time_str = datetime.datetime.now().isoformat() print("{}: dev-aver, loss {:g}, acc {:g}, acc2 {:g}".format(time_str, loss, accuracy1, accuracy2)) dev_accu.append(accuracy1) dev_accu2.append(accuracy2) dev_loss.append(loss) print("\nRecently accuracy:") print dev_accu[-10:] print dev_accu2[-10:] # if loss < optimum_loss: # optimum_loss = loss # stop_early = 0 # optimum_accu1 = accuracy1 # optimum_accu2 = accuracy2 # path = saver.save(sess, checkpoint_prefix, global_step=current_step) # print("Saved model checkpoint to {}\n".format(path)) # else: # stop_early += 1 # if stop_early == 10: # break if FLAGS.early_stop: if overfit(dev_accu, accuracy1) or overfit(dev_accu2, accuracy2): print 'Overfit!!' print(current_step) print(current_step / num_batches_per_epoch) break print("") if accuracy1 > optimum_accu1 and accuracy2 > optimum_accu2: optimum_accu1 = accuracy1 optimum_accu2 = accuracy2 path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path)) print("Optimum_accu1: " + str(optimum_accu1)) print("Optimum_accu2: " + str(optimum_accu2)) print("Optimum_accu1: " + str(optimum_accu1)) print("Optimum_accu2: " + str(optimum_accu2)) import matplotlib.pyplot as plt # def plot_plots(y1, y2, name_task, type_eval): # x1 = np.arange(len(y1)) # x2 = np.arange(len(y2)) # p1, = plt.plot(x1, y1, 'b', label="Validation") # p2, = plt.plot(x2, y2, 'r', label="Train") # plt.legend(handles=[p1, p2], numpoints=1) # make legend # plt.title(name_task + "_" + type_eval) # plt.savefig(os.path.join(out_dir, name_task + "_" + type_eval + ".png")) # # plot_plots(dev_accu, train_accu, "Disease", "Accu") # plot_plots(dev_accu2, train_accu2, "Operation", "Accu") # plot_plots(dev_loss, train_loss, "MTL", "Loss") # evaluate the result with the best model ckpt = tf.train.get_checkpoint_state(checkpoint_dir) checkpoint_file = ckpt.model_checkpoint_path graph = tf.Graph() with graph.as_default(): sess = tf.Session() with sess.as_default(): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) sess.run(tf.initialize_all_variables()) saver.restore(sess, checkpoint_file) cooccur = graph.get_operation_by_name("cooccurence").outputs[0] input_t1 = graph.get_operation_by_name("input_tensor_description").outputs[0] input_t2 = graph.get_operation_by_name("input_tensor_operation").outputs[0] input_y1 = graph.get_operation_by_name("input_y_description").outputs[0] input_y2 = graph.get_operation_by_name("input_y_operation").outputs[0] prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] loss_opr = graph.get_operation_by_name("loss/loss").outputs[0] prediction = graph.get_operation_by_name("output/predictions1").outputs[0] prediction2 = graph.get_operation_by_name("output/predictions2").outputs[0] accu = graph.get_operation_by_name("accuracy/accuracy_d").outputs[0] accu2 = graph.get_operation_by_name("accuracy/accuracy_o").outputs[0] loss, pres1, pres2, accuracy1, accuracy2 = sess.run( [loss_opr, prediction, prediction2, accu, accu2], {input_t1: x_test_tensor, input_y1: y_test, cooccur: co_arr_test, input_t2: x_test_tensor_o, input_y2: y2_test, prob: 1}) eval_file = open(out_dir + "/evaluation.txt", "w+") right_file = open(out_dir + "/right_cases.txt", "w+") wrong_file = open(out_dir + "/wrong_cases.txt", "w+") right_file2 = open(out_dir + "/right_cases_opr.txt", "w+") wrong_file2 = open(out_dir + "/wrong_cases_opr.txt", "w+") eval_file.write("Accu1: " + str(accuracy1) + "\n") eval_file.write("Accu2: " + str(accuracy2) + "\n") # eval_file.write("Stopped at: " + str(int(current_step / num_batches_per_epoch)) + "\n") eval_file.write("Default: " + str(N_default) + "\n") labels1 = np.argmax(y_test, 1) labels2 = np.argmax(y2_test, 1) write_evaluation_file(eval_file, right_file, wrong_file, labels1, pres1, x1_test, x2_test) write_evaluation_file(eval_file, right_file2, wrong_file2, labels2, pres2, x3_test, x4_test) eval_file.write("Parameters:") for attr, value in sorted(FLAGS.__flags.items()): eval_file.write("{}={}".format(attr.upper(), value) + "\n") print("loss:" + str(loss)) print("accuracy1:" + str(accuracy1)) print("accuracy2:" + str(accuracy2))
k = plot_activation(sample_idnex, k, row_n, col_n, conv1, "Str") k = plot_activation(sample_idnex, k, row_n, col_n, conv2, "Character Embedding") k = plot_activation(sample_idnex, k, row_n, col_n, conv3, "Word Embedding") plot_activation(sample_idnex, k, row_n, col_n, conv4, "Sentence Embedding") # plt.colorbar() plt.savefig(dir_ + str(sample_idnex) + "activations_color" + ".jpg") tf.flags.DEFINE_string("train_dir", "./", "Training dir root") FLAGS = tf.flags.FLAGS FLAGS._parse_flags() task_num = 1 inpH = InputHelper() max_document_length = 10 dir_ = "fig/" model_dir = "./runs/NewExp/Single_task11503543419" checkpoint_dir = os.path.join(model_dir, "checkpoints") print(checkpoint_dir) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) checkpoint_file = ckpt.model_checkpoint_path # # mydir = "./Length" + str(max_document_length) + "/" # x_test_tensor = np.load(mydir + "test_des" + ".npy") x_test_tensor = np.load("./Tensor_files/0823/Length10/test_des.npy") graph = tf.Graph() with graph.as_default():
import tensorflow as tf import os from DLDisambiguation.util.input_helpers import InputHelper from tensor import Tensor import numpy as np from util.util import write_evaluation_file tf.flags.DEFINE_string("train_dir", "./", "Training dir root") FLAGS = tf.flags.FLAGS FLAGS._parse_flags() task_num = 2 inpH = InputHelper() max_document_length = 20 name = "des" if task_num == 1 else "opr" # load in model model_dir = "./runs/Single_task21501595265" checkpoint_dir = os.path.join(model_dir, "checkpoints") print(checkpoint_dir) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) checkpoint_file = ckpt.model_checkpoint_path lstm_dir = "Description1500991322" if task_num == 1 else "Operation1501000120" lstm_dir = os.path.join("./Sentence_Modeling/runs", lstm_dir) # load data load_Tensor = True
from DLDisambiguation.util.input_helpers import InputHelper from DLDisambiguation.util.preprocess import MyVocabularyProcessor import tensorflow as tf import os # tf.flags.DEFINE_string("test_file", "../data/validation_data_0724_opr.txt", "training file (default: None)") tf.flags.DEFINE_string("test_file", "../data/test_data_0816_des.txt", "training file (default: None)") FLAGS = tf.flags.FLAGS inpH = InputHelper() max_document_length = 10 y_is_value = True model_dir = "./Exp/runs/Description1502955472" # model_dir = "./runs/Description1500991322" # 0.760 # model_dir = "./runs/1500428748" # 0.69 # model_dir = "./runs/Description1500983617" # 0.767 # model_dir = "./runs/Description1501058401" # 0.754 # model_dir = "./runs/Operation1501000120" # 0.809 checkpoint_dir = os.path.join(model_dir, "checkpoints") print(checkpoint_dir) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) checkpoint_file = ckpt.model_checkpoint_path vocab_file = os.path.join(checkpoint_dir, "vocab") # load vocabulary model vocab_processor = MyVocabularyProcessor(max_document_length, min_frequency=0) vocab_processor = vocab_processor.restore(vocab_file)
def main(): # Load data print("Loading data...") inputH = InputHelper() train_f = os.path.join(FLAGS.train_dir, 'data/exp0803/training_dynamic_data.txt') dev_f = os.path.join(FLAGS.train_dir, 'data/exp0803/validation_dynamic_data.txt') test_f = os.path.join(FLAGS.train_dir, 'data/exp0803/test_dynamic_data.txt') our_dir = "./Tensor_files/0803_dynamic/" # our_dir = "./Length" + str(FLAGS.max_sequence_len) + "/" x_train_tensor = np.load(our_dir + "train_des.npy") x_dev_tensor = np.load(our_dir + "dev_des.npy") x_test_tensor = np.load(our_dir + "test_des.npy") # our_dir = "./Length" + str(FLAGS.max_sequence_len2) + "/" x_train_tensor_o = np.load(our_dir + "train_opr.npy") x_dev_tensor_o = np.load(our_dir + "dev_opr.npy") x_test_tensor_o = np.load(our_dir + "test_opr.npy") x_train_indi_o = 1 - np.load(our_dir + "train_indi_opr.npy") x_dev_indi_o = 1 - np.load(our_dir + "dev_indi_opr.npy") x_test_indi_o = 1 - np.load(our_dir + "test_indi_opr.npy") sep = "\t" i1, x1_train, x2_train, x3_train, x4_train, y_train, y2_train = inputH.getTsvTestData_Mul_Labels_Dyna( train_f, sep, FLAGS.max_sequence_len) i2, x1_dev, x2_dev, x3_dev, x4_dev, y_dev, y2_dev = inputH.getTsvTestData_Mul_Labels_Dyna( dev_f, sep, FLAGS.max_sequence_len) i3, x1_test, x2_test, x3_test, x4_test, y_test, y2_test = inputH.getTsvTestData_Mul_Labels_Dyna( test_f, sep, FLAGS.max_sequence_len) des_e_names, des_opr_map = load_coocurrence_matrix("result.txt") co_arr_test = get_coocurrence(des_e_names, des_opr_map, x2_test, x4_test) co_arr_train = get_coocurrence(des_e_names, des_opr_map, x2_train, x4_train) co_arr_val = get_coocurrence(des_e_names, des_opr_map, x2_dev, x4_dev) with tf.Graph().as_default(): sess = tf.Session() with sess.as_default(): cnn = MT_Dynamic_MultiGranModel( max_len1=FLAGS.max_sequence_len, max_len2=FLAGS.max_sequence_len2, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda, ) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) saver = tf.train.Saver(tf.all_variables(), max_to_keep=20) # Keep track of gradient values and sparsity (optional) for g, v in grads_and_vars: if g is not None: tf.summary.histogram("grad_hist/{}".format(v.name), g) tf.summary.scalar("grad_sparsity/{}".format(v.name), tf.nn.zero_fraction(g)) tf.summary.histogram(v.name, v) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(FLAGS.train_dir, "runs", "multitask" + timestamp)) if not os.path.exists(out_dir): os.makedirs(out_dir) print("Writing to {}\n".format(out_dir)) checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary1 = tf.summary.scalar("accuracy1", cnn.accuracy_d) acc_summary2 = tf.summary.scalar("accuracy2", cnn.accuracy_o) # Train Summaries train_summary_op = tf.summary.merge_all() train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge( [loss_summary, acc_summary1, acc_summary2]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Initialize all variables sess.run(tf.initialize_all_variables()) def train_step(x_batch, y_batch, x_batch2, y_batch2, indi, co_arr): gamma = [0.5 if i == 1 else 1.0 for i in indi] gamma = np.asarray(gamma) feed_dict = { cnn.input_tensor: x_batch, cnn.input_y_description: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob, cnn.input_tensor_o: x_batch2, cnn.input_y_operation: y_batch2, cnn.mask_opr: np.asarray(indi, dtype=float), cnn.gamma: gamma, cnn.matrix: co_arr } _, step, summaries, loss, accuracy1, accuracy2 = sess.run([ train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy_d, cnn.accuracy_o ], feed_dict) time_str = datetime.datetime.now().isoformat() if step % 10 == 0: print( "{}: step {}, loss {:g}, acc1 {:g}, acc2 {:g}".format( time_str, step, loss, accuracy1, accuracy2)) train_summary_writer.add_summary(summaries, step) def dev_step(x_dev, y_batch_dev, x_dev2, y_batch_dev2, indi, co_arr, writer=None): gamma = [0.5 if i == 1 else 1.0 for i in indi] gamma = np.asarray(gamma) feed_dict = { cnn.input_tensor: x_dev, cnn.input_y_description: y_batch_dev, cnn.dropout_keep_prob: 1.0, cnn.input_y_operation: y_batch_dev2, cnn.input_tensor_o: x_dev2, cnn.mask_opr: np.asarray(indi, dtype=float), cnn.gamma: gamma, cnn.matrix: co_arr } step, summaries, loss, accuracy1, accuracy2, pres1, pres2 = sess.run( [ global_step, dev_summary_op, cnn.loss, cnn.accuracy_d, cnn.accuracy_o, cnn.scores_d, cnn.scores_o ], feed_dict) if writer: writer.add_summary(summaries, step) return loss, accuracy1, accuracy2 def evaluate(x_dev, y_batch_dev, x_dev2, y_batch_dev2, indi, co_arr): gamma = [0.5 if i == 1 else 1.0 for i in indi] gamma = np.asarray(gamma) feed_dict = { cnn.input_tensor: x_dev, cnn.input_y_description: y_batch_dev, cnn.dropout_keep_prob: 1.0, cnn.input_y_operation: y_batch_dev2, cnn.input_tensor_o: x_dev2, cnn.mask_opr: np.asarray(indi, dtype=float), cnn.gamma: gamma, cnn.matrix: co_arr } step, summaries, loss, accuracy1, accuracy2, pres1, pres2 = sess.run( [ global_step, dev_summary_op, cnn.loss, cnn.accuracy_d, cnn.accuracy_o, cnn.scores_d, cnn.scores_o ], feed_dict) eval_file = open(out_dir + "/evaluation.txt", "w+") right_file = open(out_dir + "/right_cases.txt", "w+") right_file2 = open(out_dir + "/right_cases_operation.txt", "w+") wrong_file = open(out_dir + "/wrong_cases.txt", "w+") wrong_file2 = open(out_dir + "/wrong_cases_operation.txt", "w+") eval_file.write("Accu1: " + str(accuracy1) + "\n") eval_file.write("Accu2: " + str(accuracy2) + "\n") predictions1 = np.argmax(pres1, 1) predictions2 = np.argmax(pres2, 1) labels1 = np.argmax(y_batch_dev, 1) labels2 = np.argmax(y_batch_dev2, 1) def process(indi, tensor): tmp = [] ll = len(indi) for i in range(ll): if indi[i] == 0: tmp.append(tensor[i]) return np.asarray(tmp) write_evaluation_file(eval_file, right_file, wrong_file, labels1, predictions1, x1_test, x2_test) write_evaluation_file(eval_file, right_file2, wrong_file2, labels2, predictions2, x3_test, x4_test, indi) eval_file.write("Parameters:") for attr, value in sorted(FLAGS.__flags.items()): eval_file.write("{}={}".format(attr.upper(), value) + "\n") return loss, accuracy1, accuracy2 def dev_whole(x_dev, y_dev, x_dev2, y_dev2, indi, co_dev_arr, writer=None): batches_dev = inputH.batch_iter(list( zip(x_dev, y_dev, co_dev_arr)), FLAGS.batch_size, 1, shuffle=False) batches_dev2 = inputH.batch_iter(list(zip( x_dev2, y_dev2, indi)), FLAGS.batch_size, 1, shuffle=False) losses = [] accuracies1 = [] accuracies2 = [] batches = zip(batches_dev, batches_dev2) for batches_dev, batches_dev2 in batches: x_batch, y_batch, co_arr = zip(*batches_dev) x_batch2, y_batch2, indi = zip(*batches_dev2) loss, accuracy1, accuracy2 = dev_step( x_batch, y_batch, x_batch2, y_batch2, indi, co_arr, writer) losses.append(loss) accuracies1.append(accuracy1) accuracies2.append(accuracy2) return np.mean(np.array(losses)), np.mean( np.array(accuracies1)), np.mean(np.array(accuracies2)) def overfit(dev_loss): n = len(dev_loss) if n < 5: return False for i in xrange(n - 4, n): if dev_loss[i] > dev_loss[i - 1]: return False return True # Generate batches batches = inputH.batch_iter( list( zip(x_train_tensor, y_train, x_train_tensor_o, y2_train, x_train_indi_o, co_arr_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... dev_loss = [] dev_loss2 = [] # batch_d_o = zip(batches, batches2) for batch in batches: x_batch, y_batch, x_batch2, y_batch2, indi, co_arr = zip( *batch) train_step(x_batch, y_batch, x_batch2, y_batch2, indi, co_arr) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") loss, accuracy1, accuracy2 = dev_whole( x_dev_tensor, y_dev, x_dev_tensor_o, y2_dev, x_dev_indi_o, co_arr_val, writer=dev_summary_writer) time_str = datetime.datetime.now().isoformat() print( "{}: dev-aver, loss {:g}, acc {:g}, acc2 {:g}".format( time_str, loss, accuracy1, accuracy2)) dev_loss.append(accuracy1) dev_loss2.append(accuracy2) print("\nRecently accuracy:") print dev_loss[-10:] print dev_loss2[-10:] if overfit(dev_loss): print 'Overfit!! in task1' break if overfit(dev_loss2): print 'Overfit!! in task2' break print("") if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path)) loss, accuracy1, accuracy2 = evaluate(x_test_tensor, y_test, x_test_tensor_o, y2_test, x_test_indi_o, co_arr_test) print(loss) print(accuracy1) print(accuracy2)
def __init__(self): # the max length of description/operation segment, padding if shorter, and ignore the pair if longer max_document_length = 20 inpH = InputHelper() y_is_value = True # flag to indicate that y is value(0 / 1) or array[0,1] / [1, 0] # train_set, dev_set, vocab_processor, sum_no_of_batches = inpH.getDataSets_File(FLAGS.training_files, "\t", # max_document_length, # 10, # 10---percent_dev # FLAGS.batch_size, y_value=y_is_value) # test_x1, test_x2, test_y = inpH.getTestDataSet(FLAGS.test_file, "\t\t", vocab_processor, max_document_length, y_is_value) task_num = 2 d_type = "Description" if task_num == 1 else "Operation" embedding_dir = "../data/word2vec/character_model.txt" if task_num == 1 \ else "../data/operation/character_model.txt" name = "des" if task_num == 1 else "opr" time_gen = "0823" data_file = os.path.join( FLAGS.train_dir, "data/train_data_" + time_gen + "_" + name + ".txt") data_file_test = os.path.join( FLAGS.train_dir, "data/test_data_" + time_gen + "_" + name + ".txt") # data_file_val = os.path.join(FLAGS.train_dir, "data/validation_data_" + time_gen + "_" + name + ".txt") sep = "\t" train_x1, train_x2, train_y = inpH.getTsvTestData( data_file, sep, max_document_length, y_is_value) test_x1, test_x2, test_y = inpH.getTsvTestData(data_file_test, sep, max_document_length, y_is_value) # dev_x1, dev_x2, dev_y = inpH.getTsvTestData(data_file_val, sep, max_document_length, y_is_value) dev_x1, dev_x2, dev_y = test_x1, test_x2, test_y sum_no_of_batches = len(train_y) // FLAGS.batch_size vocab_processor = MyVocabularyProcessor(max_document_length, min_frequency=0) vocab_processor.fit_transform( np.concatenate((train_x1, train_x2, dev_x1, dev_x2))) # vocab_processor.fit_transform(np.concatenate((train_x1, train_x2, test_x1, test_x2, dev_x1, dev_x2))) print("Length of loaded vocabulary ={}".format( len(vocab_processor.vocabulary_))) train_set = inpH.get_data(vocab_processor, train_x1, train_x2, train_y, max_document_length) dev_set = inpH.get_data(vocab_processor, dev_x1, dev_x2, dev_y, max_document_length) test_set = inpH.get_data(vocab_processor, test_x1, test_x2, test_y, max_document_length) # load in word2vec model embedding_matrix = self.getEmbeddingMatrix(embedding_dir, vocab_processor) sess = tf.Session() with sess.as_default(): siameseModel = SiameseLSTM(sequence_length=max_document_length, vocab_processer=vocab_processor, embedding_size=FLAGS.embedding_dim, hidden_unit_size=FLAGS.hidden_units, l2_reg_lambda=FLAGS.l2_reg_lambda, batch_size=FLAGS.batch_size, embedding_arr=embedding_matrix) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) print("initialized siameseModel object") grads_and_vars = optimizer.compute_gradients(siameseModel.loss) tr_op_set = optimizer.apply_gradients(grads_and_vars, global_step=global_step) print("defined training_ops") # Keep track of variables, gradient values and sparsity for g, v in grads_and_vars: if g is not None: tf.summary.histogram("grad_hist/{}".format(v.name), g) tf.summary.histogram("grad_sparsity/{}".format(v.name), tf.nn.zero_fraction(g)) tf.summary.histogram(v.name, v) print("defined gradient summaries") # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "Exp" + time_gen, "runs", d_type + timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", siameseModel.loss) acc_summary = tf.summary.scalar("accuracy", siameseModel.accuracy) # Train Summaries train_summary_merged = tf.summary.merge_all() train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.all_variables(), max_to_keep=100) # Write vocabulary vocab_processor.save(os.path.join(checkpoint_dir, "vocab")) # Initialize all variables sess.run(tf.initialize_all_variables()) print("init all variables") graph_def = tf.get_default_graph().as_graph_def() graphpb_txt = str(graph_def) with open(os.path.join(checkpoint_dir, "graphpb.txt"), 'w') as f: f.write(graphpb_txt) def train_step(x1_batch, x2_batch, x1_batch_m, x2_batch_m, y_batch): feed_dict = { siameseModel.input_x1: x1_batch, siameseModel.input_x2: x2_batch, siameseModel.mask_x1: x1_batch_m, siameseModel.mask_x2: x2_batch_m, siameseModel.input_y: y_batch, } _, step, summaries, loss, accuracy, dist = sess.run([ tr_op_set, global_step, train_summary_merged, siameseModel.loss, siameseModel.accuracy, siameseModel.distance ], feed_dict) time_str = datetime.datetime.now().isoformat() d = np.copy(dist) d[d >= 0.5] = 999.0 d[d < 0.5] = 1 d[d > 1.0] = 0 accuracy_t = np.mean(y_batch == d) print("TRAIN {}: step {}, loss {:g}, acc {:g}, acc_t {:g}". format(time_str, step, loss, accuracy, accuracy_t)) print(y_batch) print(dist) print(d) train_summary_writer.add_summary(summaries, step) def dev_step(x1_batch, x2_batch, x1_batch_m, x2_batch_m, y_batch): feed_dict = { siameseModel.input_x1: x1_batch, siameseModel.input_x2: x2_batch, siameseModel.mask_x1: x1_batch_m, siameseModel.mask_x2: x2_batch_m, siameseModel.input_y: y_batch, } step, summaries, loss, accuracy, dist = sess.run([ global_step, dev_summary_op, siameseModel.loss, siameseModel.accuracy, siameseModel.distance ], feed_dict) time_str = datetime.datetime.now().isoformat() d = np.copy(dist) d[d >= 0.5] = 999.0 d[d < 0.5] = 1 d[d > 1.0] = 0 accuracy_t = np.mean(y_batch == d) print( "DEV {}: step {}, loss {:g}, acc {:g}, acc_t {:g}".format( time_str, step, loss, accuracy, accuracy_t)) print(y_batch) print(dist) print(d) dev_summary_writer.add_summary(summaries, step) return accuracy def overfit(dev_loss, accu): num = 6 n = len(dev_loss) if n < num: return False for i in xrange(n - num, n): if dev_loss[i] < accu: return False print(dev_loss) print(accu) return True def evaluate(x1_batch, x2_batch, x1_batch_m, x2_batch_m, y_batch, mention, entity): feed_dict = { siameseModel.input_x1: x1_batch, siameseModel.input_x2: x2_batch, siameseModel.mask_x1: x1_batch_m, siameseModel.mask_x2: x2_batch_m, siameseModel.input_y: y_batch, } loss, accuracy, dist = sess.run([ siameseModel.loss, siameseModel.accuracy, siameseModel.distance ], feed_dict) time_str = datetime.datetime.now().isoformat() print("Test {}: loss {:g}, acc {:g}".format( time_str, loss, accuracy)) print(dist) eval_file = open(out_dir + "/evaluation.txt", "w+") right_file = open(out_dir + "/right_cases.txt", "w+") wrong_file = open(out_dir + "/wrong_cases.txt", "w+") eval_file.write("Accu: " + str(accuracy) + "\n") eval_file.write("Dataset: " + data_file + "\n") eval_file.write("Early Stopped at: " + str(stop_p) + "\n") d = np.copy(dist) d[d >= 0.5] = 999.0 d[d < 0.5] = 1 d[d > 1.0] = 0 predictions = d write_evaluation_file(eval_file, right_file, wrong_file, y_batch, predictions, mention, entity) return accuracy # Generate batches batches = inpH.batch_iter( list( zip(train_set[0], train_set[1], train_set[2], train_set[3], train_set[4])), FLAGS.batch_size, FLAGS.num_epochs) max_validation_acc = 0.0 num_batches_per_epoch = int(len(train_set[0]) / FLAGS.batch_size) print num_batches_per_epoch max_accu = 0 dev_accu = [] for nn in xrange(sum_no_of_batches * FLAGS.num_epochs): batch = batches.next() if len(batch) < 1: continue x1_batch, x2_batch, x1_batch_m, x2_match_m, y_batch = zip( *batch) if len(y_batch) < 1: continue train_step(x1_batch, x2_batch, x1_batch_m, x2_match_m, y_batch) current_step = tf.train.global_step( sess, global_step) # get the global step. sum_acc = 0.0 tmp = [] if current_step % num_batches_per_epoch == 0: print("\nEvaluation:") # dev_batches = inpH.batch_iter(list(zip(dev_set[0], dev_set[1], dev_set[2], dev_set[3], dev_set[4])), # FLAGS.batch_size, 1) # for db in dev_batches: # if len(db) < 1: # continue # x1_dev_b, x2_dev_b, x1_dev_m, x2_dev_m, y_dev_b = zip(*db) # if len(y_dev_b) < 1: # continue # acc = dev_step(x1_dev_b, x2_dev_b, x1_dev_m, x2_dev_m, y_dev_b) # sum_acc = sum_acc + acc # tmp.append(acc) # # acc_mean = np.mean(tmp) acc_mean = dev_step(dev_set[0], dev_set[1], dev_set[2], dev_set[3], dev_set[4]) dev_accu.append(acc_mean) if overfit(dev_accu, acc_mean): print 'Overfit!!' print("Optimum" + str(max_accu)) print(current_step) stop_p = current_step / num_batches_per_epoch print(stop_p) break if acc_mean >= max_accu: max_accu = acc_mean saver.save( sess, checkpoint_prefix, global_step=current_step) # save checkpoints tf.train.write_graph(sess.graph.as_graph_def(), checkpoint_prefix, "graph" + str(nn) + ".pb", as_text=False) # save graph_def print( "Saved model {} with sum_accuracy={} checkpoint to {}\n" .format(nn, max_validation_acc, checkpoint_prefix)) evaluate(test_set[0], test_set[1], test_set[2], test_set[3], test_set[4], test_x1, test_x2)
import tensorflow as tf import os import numpy as np def writeToFile(m, file): l = len(m) for i in range(l): l_col = len(m[i]) file.write("[") for j in range(l_col): file.write(str(m[i][j]) + " ") file.write("]\n") file.write("\n") inpH = InputHelper() max_document_length = 10 y_is_value = True def get_data(vocab_processor, inpH, train_x1, train_x2, train_y, max_document_length): train_x1_i = np.asarray(list(vocab_processor.transform(train_x1))) train_x2_i = np.asarray(list(vocab_processor.transform(train_x2))) mask_train_x1 = np.zeros([len(train_x1_i), max_document_length]) mask_train_x2 = np.zeros([len(train_x2_i), max_document_length]) new_mask_x1, new_mask_x2 = inpH.padding_and_generate_mask(train_x1, train_x2, mask_train_x1, mask_train_x2) return (train_x1_i, train_x2_i, new_mask_x1, new_mask_x2, train_y)