# 混淆矩阵 print("Confusion Matrix...") cm = metrics.confusion_matrix(y_test_cls, y_pred_cls) print(cm) time_dif = get_time_dif(start_time) print("Time usage:", time_dif) if __name__ == '__main__': # 输入参数 train 和 test 表示训练与测试 # 需要在命令行运行 python run_cnn.py <train>|<test> if len(sys.argv) != 2 or sys.argv[1] not in ['train', 'test']: raise ValueError("""usage: python run_cnn.py [train / test]""") print('Configuring CNN model...') config = TCNNConfig() # 获得TCNNConfig设置,TCNNConfig表示CNN配置参数 if not os.path.exists( vocab_dir): # 如果不存在词汇表,重建 单词表长度5000,是train里面出现最频繁的5000个单词 build_vocab(train_dir, vocab_dir, config.vocab_size) categories, cat_to_id = read_category( ) # read_category()获取目录,cat_to_id 标签:序号的字典 words, word_to_id = read_vocab(vocab_dir) # 将词汇表的各个单词编号 config.vocab_size = len(words) # 更新词汇表长度 model = TextCNN(config) # 构建CNN模型,很重要 if sys.argv[1] == 'train': train() else: test()
} y_pred_cls[start_id:end_id] = session.run(model.y_pred_cls, feed_dict=feed_dict) # 评估 print("Precision, Recall and F1-Score...") print(metrics.classification_report(y_test_cls, y_pred_cls, target_names=categories)) # 混淆矩阵 print("Confusion Matrix...") cm = metrics.confusion_matrix(y_test_cls, y_pred_cls) print(cm) time_dif = get_time_dif(start_time) print("Time usage:", time_dif) if __name__ == '__main__': print('Configuring CNN model...') config = TCNNConfig() if not os.path.exists(vocab_dir): build_vocab(train_dir, vocab_dir, config.vocab_size) categories, cat_to_id = read_category() words, word_to_id = read_vocab(vocab_dir) config.vocab_size = len(words) model = TextCNN(config) if sys.argv[1] == 'train': train() else: test()
for i in range(0, cnn.config.vocab_size): if(cnn.words[i] in word_vector_map): # word_vector_map.has_key(cnn.words[i]) count = count sub_embeddings[i]= word_vector_map.get(cnn.words[i]) else: count = count + 1 missing_words_file.write(cnn.words[i]+'\n') print('no embedding: ' + str(1.0 * count/len(cnn.words))) print(str(len(sub_embeddings)) + '\t' + str(len(sub_embeddings[0]))) missing_words_file.close() print(sub_embeddings[0]) cnn.embedding_matrix = sub_embeddings #print(cnn.embedding_matrix.shape) cnn.model = TextCNN(cnn.config) cnn.train() predict_y = cnn.test() #predicting results print(predict_y) print(len(predict_y)) print(len(test_data_Y)) tf.reset_default_graph() correct_count = 0 for i in range(len(test_data_Y)): if cnn.id_to_cat[predict_y[i]] == test_data_Y[i]: correct_count += 1 doc_node = doc.createElement("doc") doc_node.setAttribute("id", test_docs[i].split(',')[0])