def predict(): with open(map_path, "rb") as f: word_to_id, cat_to_id, seq_length, num_classes = pickle.load(f) id_to_cat = {v: k for k, v in cat_to_id.items()} config = TRNNConfig() config.num_classes = num_classes config.vocab_size = len(word_to_id) model = TextRNN(config) session = tf.Session() session.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess=session, save_path=save_path) # 读取保存的模型 while True: line = input("请输入测试句子:") data_id = [[ word_to_id[x] for x in list(native_content(line)) if x in word_to_id ]] x_pad = kr.preprocessing.sequence.pad_sequences(data_id, seq_length) y_pred_cls = session.run(model.y_pred_cls, feed_dict={ model.input_x: x_pad, model.keep_prob: 1.0 }) print('sentence : {}, prdict intent : {}'.format( line, id_to_cat[y_pred_cls[0]])) a = 1
data_dir = sys.argv[4] base_dir = 'data/' + data_dir + '/' + t_name classes = sys.argv[5].split('-') train_dir = os.path.join(base_dir, 'train.csv') test_dir = os.path.join(base_dir, 'test.csv') val_dir = os.path.join(base_dir, 'dev.csv') vocab_dir = os.path.join('data/data_orginal/'+t_name, 'vocab.csv') if not os.path.exists(vocab_dir): # 如果不存在词汇表,重建 print(' vocab_dir not exists: ',vocab_dir) build_vocab('data/data_orginal/'+t_name+'/whole.csv', vocab_dir, config.vocab_size) categories, cat_to_id = read_category(classes) words, word_to_id = read_vocab(vocab_dir) config.vocab_size = len(words) config.num_classes = len(classes) mode_name = 'textrnn' save_dir = 'checkpoints/' + t_name + '/' + mode_name + '_' + t_name + "_" + data_dir + '_' + t_th + 'th' print('save_dir:', save_dir) save_path = os.path.join(save_dir, 'best_validation') # 最佳验证结果保存路径 model = TextRNN(config) if sys.argv[1] == 'train': train() else: test() else: print('usage: parameters are less than 4: python file, number of running the task, task name')
predict() exit() # if len(sys.argv) != 2 or sys.argv[1] not in ['train', 'test']: # raise ValueError("""usage: python run_rnn.py [train / test]""") d = 'data/cnews/' train_dir = d + 'sen_class.train' test_dir = d + 'sen_class.test' val_dir = d + 'sen_class.val' vocab_dir = d + 'sen_class.vocab' print('Configuring RNN model...') config = TRNNConfig() labels = build_vocab(train_dir, test_dir, val_dir, vocab_dir, config.vocab_size) config.num_classes = len(labels) categories, cat_to_id = read_category(labels) words, word_to_id = read_vocab(vocab_dir) config.vocab_size = len(words) model = TextRNN(config) print('labels: {}, vocabulary size: {}'.format(config.num_classes, len(words))) with open(map_path, "wb") as f: pickle.dump( [word_to_id, cat_to_id, config.seq_length, config.num_classes], f) train() exit() if sys.argv[1] == 'train': train()
# 混淆矩阵 print("Confusion Matrix...") cm = metrics.confusion_matrix(y_test_cls, y_pred_cls) print(cm) time_dif = get_time_dif(start_time) print("Time usage:", time_dif) if __name__ == '__main__': if len(sys.argv) != 2 or sys.argv[1] not in ['train', 'test']: raise ValueError("""usage: python run_rnn.py [train / test]""") print('Configuring RNN model...') config = TRNNConfig() if not os.path.exists(vocab_dir): # 如果不存在词汇表,重建 build_vocab(train_dir, vocab_dir, config.vocab_size) categories, cat_to_id = read_category(problem) print('categories:', categories) print('cat_to_id:', cat_to_id) words, word_to_id = read_vocab(vocab_dir) config.vocab_size = len(words) config.num_classes = len(categories) model = TextRNN(config) if sys.argv[1] == 'train': train() else: test()