lr = 0.001 #learning rate num_epochs = 5 #epochs batch_size = 64 #batch_size keep_p = 0.5 #keep_prob train_filename = './data/cnews.train.txt' #train data test_filename = './data/cnews.test.txt' #test data val_filename = './data/cnews.val.txt' #validation data vocab_filename = './data/vocab_word.txt' #vocabulary vector_word_filename = './data/vector_word.txt' #vector_word trained by word2vec vector_word_npz = './data/vector_word.npz' # save vector_word to numpy file categories, cat_to_id = read_category() wordid = get_wordid(vocab_filename) embeddings = get_word2vec(vector_word_npz) ############################################################ class data_loader(): def __init__(self): self.train_x, self.train_y = process(train_filename, wordid, cat_to_id, max_length=300) self.test_x, self.test_y = process(test_filename, wordid, cat_to_id,
val_x, val_y = process_predict(pm.val_filename, wordid, cat_to_id, max_length=pm.seq_length) batch_val = batch_iter_predict(val_x, val_y, batch_size=64) for x_batch, y_batch in batch_val: real_seq_len = seq_length(x_batch) feed_dict = model.feed_data(x_batch, y_batch, real_seq_len, 1.0) pre_lab = session.run(model.predict, feed_dict=feed_dict) pre_label.extend(pre_lab) return pre_label if __name__ == '__main__': start = time.time() pm = pm categories, cat_to_id = read_category() wordid = get_wordid(pm.vocab_filename) pm.vocab_size = len(wordid) pm.pre_training = get_word2vec(pm.vector_word_npz) model = Lstm_CNN() pre_label = val() contents_list = list(zip(pre_label, [num for item in pm.contents_finally for num in item])) # 在当前目录下创建文件 i, j = 1, 1 finally_data_dir = './data/Finally_Data' if os.path.exists(os.path.join(finally_data_dir, 'No_Value.txt')): os.remove(os.path.join(finally_data_dir, 'No_Value.txt')) if os.path.exists(os.path.join(finally_data_dir, 'Value.txt')): os.remove(os.path.join(finally_data_dir, 'Value.txt')) if not os.path.exists(finally_data_dir):
test_loss, test_accuracy = model.test(session, x_test, y_test) print('global_step:', global_step, 'train_loss:', train_loss, 'train_accuracy:', train_accuracy, 'test_loss:', test_loss, 'test_accuracy:', test_accuracy) summary = tf.Summary(value=[tf.Summary.Value(tag="accuracy", simple_value=test_accuracy)]) writer_test.add_summary(summary, global_step) summary = tf.Summary(value=[tf.Summary.Value(tag="loss", simple_value=test_loss)]) writer_test.add_summary(summary, global_step) if global_step % num_batchs == 0: print('Saving Model...') saver.save(session, save_path, global_step=global_step) pm.learning_rate *= pm.lr_decay if __name__ == '__main__': # pm = pm start = time.time() filenames = [pm.train_filename, pm.test_filename, pm.val_filename] categories, cat_to_id = read_category() # 给10个类别编号 wordid = get_wordid(pm.vocab_filename) # 给10000个常用词词典单词编号 pm.vocab_size = len(wordid) pm.pre_training = get_word2vec(pm.vector_word_npz) model = Lstm_CNN() train() stop = time.time() print("程序运行时间: ", (stop - start))