def test(self,sess, x, y): batch_test = batch_iter(x, y, batch_size=pm.batch_size) for x_batch, y_batch in batch_test: real_seq_len = seq_length(x_batch) feed_dict = self.feed_data(x_batch, y_batch, real_seq_len, 1.0) test_loss, test_accuracy = sess.run([self.loss, self.accuracy], feed_dict=feed_dict) return test_loss, test_accuracy
def test(self, sess, x, y): batch_test = batch_iter(x, y, batch_size=pm.batch_size) for x_batch, y_batch in batch_test: x_batch, seq_length_x = process(x_batch) y_batch, seq_length_y = process(y_batch) feed_dict = self.feed_data(x_batch, y_batch, seq_length_x, 1.0) loss = sess.run(self.loss, feed_dict=feed_dict) return loss
def train(): """使用tensorboard创建视图""" tensorboard_dir = './tensorboard/Lstm_CNN' save_dir = './checkpoints/Lstm_CNN' if not os.path.exists(os.path.join(tensorboard_dir, 'train')): os.makedirs(os.path.join(tensorboard_dir, 'train')) if not os.path.exists(os.path.join(tensorboard_dir, 'test')): os.makedirs(os.path.join(tensorboard_dir, 'test')) if not os.path.exists(save_dir): os.makedirs(save_dir) save_path = os.path.join(save_dir, 'best_validation') # 在想要的节点下标注总结指令;例如记录标量: tf.summary.scalar('loss', model.loss) tf.summary.scalar('accuracy', model.accuracy) # 将所有的想要的节点信息通过tf.summary.merge_all()打包为一个节点,这里命名为summary__merge_op,随后创建一个写入器,为后续的写入磁盘创建接口; merged_summary = tf.summary.merge_all() writer_train = tf.summary.FileWriter(os.path.join(tensorboard_dir, 'train')) # 把图保存到一个路径 writer_test = tf.summary.FileWriter(os.path.join(tensorboard_dir, 'test')) saver = tf.train.Saver() session = tf.Session() session.run(tf.global_variables_initializer()) writer_train.add_graph(session.graph) """处理训练集、测试集数据""" x_train, y_train = process(pm.train_filename, wordid, cat_to_id, max_length=300) x_test, y_test = process(pm.test_filename, wordid, cat_to_id, max_length=300) for epoch in range(pm.num_epochs): print('Epoch:', epoch+1) num_batchs = int((len(x_train) - 1) / pm.batch_size) + 1 batch_train = batch_iter(x_train, y_train, batch_size=pm.batch_size) for x_batch, y_batch in batch_train: real_seq_len = seq_length(x_batch) # 获取句子真实长度 feed_dict = model.feed_data(x_batch, y_batch, real_seq_len, pm.keep_prob) _, global_step, _summary, train_loss, train_accuracy = session.run([model.optimizer, model.global_step, merged_summary, model.loss, model.accuracy], feed_dict=feed_dict) summary = tf.Summary(value=[tf.Summary.Value(tag="loss", simple_value=train_loss)]) writer_train.add_summary(summary, global_step) summary = tf.Summary(value=[tf.Summary.Value(tag="accuracy", simple_value=train_accuracy)]) writer_train.add_summary(summary, global_step) if global_step % 5 == 0: test_loss, test_accuracy = model.test(session, x_test, y_test) print('global_step:', global_step, 'train_loss:', train_loss, 'train_accuracy:', train_accuracy, 'test_loss:', test_loss, 'test_accuracy:', test_accuracy) summary = tf.Summary(value=[tf.Summary.Value(tag="accuracy", simple_value=test_accuracy)]) writer_test.add_summary(summary, global_step) summary = tf.Summary(value=[tf.Summary.Value(tag="loss", simple_value=test_loss)]) writer_test.add_summary(summary, global_step) if global_step % num_batchs == 0: print('Saving Model...') saver.save(session, save_path, global_step=global_step) pm.learning_rate *= pm.lr_decay
def train(): tensorboard_dir = './tensorboard/Rnn_Attention' save_dir = './checkpoints/Rnn_Attention' if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) if not os.path.exists(save_dir): os.makedirs(save_dir) save_path = os.path.join(save_dir, 'best_validation') tf.summary.scalar('loss', model.loss) tf.summary.scalar('accuracy', model.accuracy) merged_summary = tf.summary.merge_all() writer = tf.summary.FileWriter(tensorboard_dir) saver = tf.train.Saver() session = tf.Session() session.run(tf.global_variables_initializer()) writer.add_graph(session.graph) x_train, y_train = process(pm.train_filename, wordid, cat_to_id, max_length=250) x_test, y_test = process(pm.test_filename, wordid, cat_to_id, max_length=250) for epoch in range(pm.num_epochs): print('Epoch:', epoch + 1) num_batchs = int((len(x_train) - 1) / pm.batch_size) + 1 batch_train = batch_iter(x_train, y_train, batch_size=pm.batch_size) for x_batch, y_batch in batch_train: seq_len = sequence(x_batch) feed_dict = model.feed_data(x_batch, y_batch, seq_len, pm.keep_prob) _, global_step, _summary, train_loss, train_accuracy = session.run( [ model.optimizer, model.global_step, merged_summary, model.loss, model.accuracy ], feed_dict=feed_dict) if global_step % 100 == 0: test_loss, test_accuracy = model.evaluate( session, x_test, y_test) print('global_step:', global_step, 'train_loss:', train_loss, 'train_accuracy:', train_accuracy, 'test_loss:', test_loss, 'test_accuracy:', test_accuracy) if global_step % num_batchs == 0: print('Saving Model...') saver.save(session, save_path, global_step=global_step) pm.learning_rate *= pm.lr_decay
def train(): tensorboard_dir = './tensorboard/biLstm_crf' save_dir = './checkpoints/biLstm_crf' if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) if not os.path.exists(save_dir): os.makedirs(save_dir) save_path = os.path.join(save_dir, 'best_validation') tf.summary.scalar('loss', model.loss) merged_summary = tf.summary.merge_all() writer = tf.summary.FileWriter(tensorboard_dir) saver = tf.train.Saver() session = tf.Session() session.run(tf.global_variables_initializer()) writer.add_graph(session.graph) content_train, label_train = sequence2id(pm.train) content_test, label_test = sequence2id(pm.test) for epoch in range(pm.epochs): print('Epoch:', epoch + 1) num_batchs = int((len(content_train) - 1) / pm.batch_size) + 1 batch_train = batch_iter(content_train, label_train) for x_batch, y_batch in batch_train: x_batch, seq_leng_x = process(x_batch) y_batch, seq_leng_y = process(y_batch) feed_dict = model.feed_data(x_batch, y_batch, seq_leng_x, pm.keep_pro) _, global_step, loss, tain_summary = session.run( [ model.optimizer, model.global_step, model.loss, merged_summary ], feed_dict=feed_dict) if global_step % 100 == 0: test_loss = model.test(session, content_test, label_test) print('global_step:', global_step, 'train_loss:', loss, 'test_loss:', test_loss) if global_step % (2 * num_batchs) == 0: print('Saving Model...') saver.save(session, save_path=save_path, global_step=global_step) pm.learning_rate *= pm.lr
def val(): pre_label = [] label = [] session = tf.Session() session.run(tf.global_variables_initializer()) save_path = tf.train.latest_checkpoint('./checkpoints/Lstm_CNN') saver = tf.train.Saver() saver.restore(sess=session, save_path=save_path) val_x, val_y = process(pm.val_filename, wordid, cat_to_id, max_length=pm.seq_length) batch_val = batch_iter(val_x, val_y, batch_size=64) for x_batch, y_batch in batch_val: real_seq_len = seq_length(x_batch) feed_dict = model.feed_data(x_batch, y_batch, real_seq_len, 1.0) pre_lab = session.run(model.predict, feed_dict=feed_dict) pre_label.extend(pre_lab) label.extend(y_batch) return pre_label, label
def val(): pre_label = [] label = [] session = tf.Session() session.run(tf.global_variables_initializer()) save_path = tf.train.latest_checkpoint('./checkpoints/Rnn_Attention') saver = tf.train.Saver() saver.restore(sess=session, save_path=save_path) val_x, val_y = process(pm.val_filename, wordid, cat_to_id, max_length=250) batch_val = batch_iter(val_x, val_y, batch_size=64) for x_batch, y_batch in batch_val: seq_len = sequence(x_batch) pre_lab = session.run(model.predict, feed_dict={ model.input_x: x_batch, model.seq_length: seq_len, model.keep_pro: 1.0 }) pre_label.extend(pre_lab) label.extend(y_batch) return pre_label, label
def train(): tensorboard_dir = './tensorboard/Seq2Seq' save_dir = './checkpoints/Seq2Seq' if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) if not os.path.exists(save_dir): os.makedirs(save_dir) save_path = os.path.join(save_dir, 'best_validation') tf.summary.scalar('loss', model.loss) merged_summary = tf.summary.merge_all() writer = tf.summary.FileWriter(tensorboard_dir) saver = tf.train.Saver() session = tf.Session() session.run(tf.global_variables_initializer()) writer.add_graph(session.graph) x, y = label2id(pm.train_data) x_train, y_train = x[1:100001], y[1:100001] x_test, y_test = x[50001:80000], y[50001:80000] for epoch in range(pm.num_epochs): print('Epoch:', epoch+1) num_batchs = int((len(x_train) - 1) / pm.batch_size) + 1 batch_train = batch_iter(x_train, y_train, batch_size=pm.batch_size) for x_batch, y_batch in batch_train: feed_dict = model.feed_data(x_batch, y_batch, pm.keep_pro) _, global_step, _summary, train_loss = session.run([model.optimizer, model.global_step, merged_summary, model.loss], feed_dict=feed_dict) if global_step % 100 == 0: test_loss = model.test(session, x_test, y_test) print('global_step:', global_step, 'train_loss:', train_loss, 'test_loss:', test_loss) if global_step % (3*num_batchs) == 0: print('Saving Model...') saver.save(session, save_path, global_step=global_step) pm.learning_rate *= pm.lr
def train( x_word_train, y_word_train, x_word_test, y_word_test, x_selfentity_train, y_selfentity_train, x_selfentity_test, y_selfentity_test, x_fatherentity_train, y_fatherentity_train, x_fatherentity_test, y_fatherentity_test, x_cnn_train, y_cnn_train, x_cnn_test, y_cnn_test, ): tensorboard_dir = './tensorboard/Rnn_Attention' save_dir = './checkpoints/Rnn_Attention' if not os.path.exists(tensorboard_dir): #只是创建目录而已 os.makedirs(tensorboard_dir) if not os.path.exists(save_dir): os.makedirs(save_dir) save_path = os.path.join(save_dir, 'best_validation') tf.summary.scalar('loss', model.loss) tf.summary.scalar('accuracy', model.accuracy) merged_summary = tf.summary.merge_all() writer = tf.summary.FileWriter(tensorboard_dir) saver = tf.train.Saver() session = tf.Session() session.run(tf.global_variables_initializer()) writer.add_graph(session.graph) T_loss = [] L_loss = [] for epoch in range(pm.num_epochs): # # # print('Epoch:', epoch+1) num_batchs = int((len(x_word_train) - 1) / pm.batch_size) + 1 batch_train = batch_iter(x_word_train, y_word_train, x_selfentity_train, y_selfentity_train, x_fatherentity_train, y_fatherentity_train, x_cnn_train, y_cnn_train, batch_size=pm.batch_size) for x1, y1, x2, y2, x3, y3, x4, y4 in batch_train: #坑就是在这里 前面4个值,不能和上面batch_iter的值重了 word_seq_len = sequence(x1) selfentity_seq_len = sequence(x2) fatherentity_seq_len = sequence(x3) cnn_inter_seq_len = sequence(x4) feed_dict = model.feed_data(x1, y1, word_seq_len, x2, y2, selfentity_seq_len, x3, y3, fatherentity_seq_len, x4, y4, cnn_inter_seq_len, pm.keep_prob) _, global_step, _summary, train_loss, train_accuracy = session.run( [ model.optimizer, model.global_step, merged_summary, model.loss, model.accuracy ], feed_dict=feed_dict) # print('global_step:', global_step, 'train_loss:', train_loss, 'train_accuracy:', train_accuracy) if global_step % 50 == 0: test_loss, test_accuracy = model.evaluate( session, x_word_test, y_word_test, x_selfentity_test, y_selfentity_test, x_fatherentity_test, y_fatherentity_test, x_cnn_test, y_cnn_test) print('global_step:', global_step, 'train_loss:', train_loss, 'train_accuracy:', train_accuracy, 'test_loss:', test_loss, 'test_accuracy:', test_accuracy) T_loss.append(train_loss) L_loss.append(test_loss) if global_step % num_batchs == 0: print('Saving Model...') saver.save(session, save_path, global_step=global_step) # pm.learning_rate *= pm.lr_decay x = range(len(T_loss)) plt.plot(x, T_loss) # blue is the train loss plt.plot(x, L_loss, "r--") # red is the test plt.show()
def test(self, sess, x, y): batch_test = batch_iter(x, y, batch_size=pm.batch_size) for x_batch, y_batch in batch_test: feed_dict = self.feed_data(x_batch, y_batch, 1.0) test_loss = sess.run(self.loss, feed_dict=feed_dict) return test_loss
# ## Train and Evaluate Model learning_rate = 0.001 num_epochs = 30 batch_size = 100 # ### Human Results vocab_size = 23 # number words in the vocabulary base data_size = len(human_train_tensors) num_labels = human_GO_terms.shape[0] lstm = LSTM(vocab_size, emb_dim, hidden_dim, num_labels, batch_size) criterion = nn.MultiLabelSoftMarginLoss() optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate, weight_decay=L2_penalty) data_iter = dp.batch_iter(batch_size, human_train_tensors, human_train_labels, human_train_lengths) dev_batches = dp.eval_iter(batch_size, human_valid_tensors, human_valid_labels) if torch.cuda.is_available(): lstm = lstm.cuda() criterion = criterion.cuda() # Model Training organism = 'Human' train_test(num_epochs, optimizer, data_iter, dev_batches, lstm, data_size, output_file)
# ## Train and Evaluate Model learning_rate = 0.001 num_epochs = 30 batch_size = 100 # ### Yeast Results vocab_size = 21 # number words in the vocabulary base data_size = len(yeast_train_tensors) num_labels = yeast_GO_terms.shape[0] lstm = LSTM(vocab_size, emb_dim, hidden_dim, num_labels, batch_size) criterion = nn.MultiLabelSoftMarginLoss() optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate, weight_decay=L2_penalty) data_iter = dp.batch_iter(batch_size, yeast_train_tensors, yeast_train_labels, yeast_train_lengths) dev_batches = dp.eval_iter(batch_size, yeast_valid_tensors, yeast_valid_labels) if torch.cuda.is_available(): lstm = lstm.cuda() criterion = criterion.cuda() # Model Training organism = 'Yeast' train_test(num_epochs, optimizer, data_iter, dev_batches, lstm, data_size, output_file)
def val(): """----word level----""" all_word_file = "./word/all_clean.txt" train_word_file = "./word/train_clean.txt" test_word_file = "./word/test_clean.txt" """----self entity level----""" all_selfentity_file = "./self_entity/all_good_selfentity_last.txt" train_selfentity_file = "./self_entity/train_good_selfentity_last.txt" test_selfentity_file = "./self_entity/test_good_selfentity_last.txt" """----father entity level----""" all_fatherentity_file = "./father_entity/all_good_fatherentity_last.txt" train_fatherentity_file = "./father_entity/train_good_fatherentity_last.txt" test_fatherentity_file = "./father_entity/test_good_fatherentity_last.txt" """---------CNN--interaction----------""" all_cnn_file = "./interaction_data/all_label.txt" train_cnn_file = "./interaction_data/train_label.txt" test_cnn_file = "./interaction_data/test_label.txt" pre_label = [] label = [] session = tf.Session() session.run(tf.global_variables_initializer()) save_path = tf.train.latest_checkpoint('./checkpoints/Rnn_Attention') saver = tf.train.Saver() saver.restore(sess=session, save_path=save_path) x_word_train, y_word_train, vocab_word_processor, x_word_test, y_word_test, \ x_selfentity_train, y_selfentity_train, vocab_selfentity_processor, x_selfentity_test, y_selfentity_test, \ x_fatherentity_train, y_fatherentity_train, vocab_fatherentity_processor, x_fatherentity_test, y_fatherentity_test, \ x_cnn_train, y_cnn_train, vocab_cnn_processor, x_cnn_test, y_cnn_test \ = shuffle_data(all_word_file, train_word_file, test_word_file, all_selfentity_file, train_selfentity_file, test_selfentity_file, all_fatherentity_file, train_fatherentity_file, test_fatherentity_file, all_cnn_file, train_cnn_file, test_cnn_file ) batch_val = batch_iter(x_word_test, y_word_test, x_selfentity_test, y_selfentity_test, x_fatherentity_test, y_selfentity_test, x_cnn_test, y_cnn_test, batch_size=64) for x_batch, y_batch,x_selfentity_test, y_selfentity_test,x_fatherentity_test, \ y_selfentity_test, x_cnn_test,y_cnn_test in batch_val: seq_len = sequence(x_batch) selfentity_len = sequence(x_selfentity_test) fatherentity_len = sequence(x_fatherentity_test) cnninter_len = sequence(x_cnn_test) pre_lab = session.run(model.predict, feed_dict={ model.input_word_x1: x_batch, model.input_y1: y_batch, model.input_selfentity_x1: x_selfentity_test, model.input_selfentity_y1: y_selfentity_test, model.input_fatherentity_x1: x_fatherentity_test, model.input_fatherentity_y1: y_selfentity_test, model.input_cnn_x1: x_cnn_test, model.input_cnn_y1: y_cnn_test, model.word_seq_length: seq_len, model.self_entity_seq_length: selfentity_len, model.father_entity_seq_length: fatherentity_len, model.cnn_inter_seq_length: cnninter_len, model.keep_pro: 1.0 }) pre_label.extend(pre_lab) label.extend(y_batch) return pre_label, label