def get_cnn_result(): if not os.path.exists(embedding_model_path): print("word2vec model is not found") if not os.path.exists(train_data_path): print("train params is not found") params = readdata.loadDict(train_data_path) train_length = int(params['max_sentences_length']) #写入文件,处理文件 test_sample_lists = readdata.get_cleaned_list(test_file_path) test_sample_lists, max_sentences_length = readdata.padding_sentences( test_sample_lists, padding_token='<PADDING>', padding_sentence_length=train_length) test_sample_arrays = np.array( word2vec.get_embedding_vector(test_sample_lists, embedding_model_path)) testconfig = config() testconfig.max_sentences_length = max_sentences_length sess = tf.InteractiveSession() cnn = Cnn_Model.TextCNN(config=testconfig) #加载参数 saver = tf.train.Saver() saver.restore( sess, "D:/urunD:/urun/Comments_Classifiation-master/data/cnn/text_model") #定义测试函数,可以给出相对应的预测还有分数。sess.run:变量的赋值和计算 def test_step(x_batch): feed_dict = {cnn.input_x: x_batch, cnn.dropout_keep_prob: 1.0} predictions, scores = sess.run([cnn.predictions, cnn.softmax_result], feed_dict=feed_dict) return (predictions, scores) #拿到结果 predictions, scores = test_step(test_sample_arrays) return np.array(predictions)
def get_lstm_result(): if not os.path.exists(embedding_model_path): print("word2vec model is not found") if not os.path.exists(train_data_path): print("train params is not found") params = readdata.loadDict(train_data_path) train_length = int(params['max_sentences_length']) test_sample_lists = readdata.get_cleaned_list(test_file_path) test_sample_lists, max_sentences_length = readdata.padding_sentences( test_sample_lists, padding_token='<PADDING>', padding_sentence_length=train_length) test_sample_arrays = np.array( word2vec.get_embedding_vector(test_sample_lists, embedding_model_path)) testconfig = config() testconfig.max_sentences_length = max_sentences_length sess = tf.InteractiveSession() lstm = Lstm_Model.TextLSTM(config=testconfig) saver = tf.train.Saver() saver.restore( sess, "D:/urun/Comments_Classifiation-master/data/lstm/text_model") #定义测试函数 def test_step(x_batch): feed_dict = { lstm.input_x: x_batch, lstm.dropout_keep_prob: testconfig.dropout_keep_prob } predictions, scores = sess.run([lstm.predictions, lstm.softmax_result], feed_dict=feed_dict) return (predictions, scores) predictions, scores = test_step(test_sample_arrays) return np.array(predictions)
def get_mixed_result(): if not os.path.exists(embedding_model_path): print("word2vec model is not found") if not os.path.exists(lstm_train_data_path): print("lstm train params is not found") lstm_params = readdata.loadDict(lstm_train_data_path) lstm_train_length = int(lstm_params['max_sentences_length']) if not os.path.exists(cnn_train_data_path): print("cnn train params is not found") cnn_params = readdata.loadDict(cnn_train_data_path) cnn_train_length = int(cnn_params['max_sentences_length']) test_sample_lists = readdata.get_cleaned_list(test_file_path) lstm_test_sample_lists, lstm_max_sentences_length = readdata.padding_sentences( test_sample_lists, padding_token='<PADDING>', padding_sentence_length=lstm_train_length) cnn_test_sample_lists, cnn_max_sentences_length = readdata.padding_sentences( test_sample_lists, padding_token='<PADDING>', padding_sentence_length=cnn_train_length) lstm_test_sample_arrays = np.array( word2vec.get_embedding_vector(lstm_test_sample_lists, embedding_model_path)) cnn_test_sample_arrays = np.array( word2vec.get_embedding_vector(cnn_test_sample_lists, embedding_model_path)) lstm_config = lstmconfig() cnn_config = cnnconfig() lstm_config.max_sentences_length = lstm_max_sentences_length cnn_config.max_sentences_length = cnn_max_sentences_length lstm_graph = tf.Graph() cnn_graph = tf.Graph() lstm_sess = tf.Session(graph=lstm_graph) cnn_sess = tf.Session(graph=cnn_graph) with lstm_sess.as_default(): with lstm_graph.as_default(): lstm = lstm_model.TextLSTM(config=lstm_config) lstm_saver = tf.train.Saver() lstm_saver.restore(lstm_sess, "data/lstm/text_model") def lstm_test_step(x_batch): feed_dict = { lstm.input_x: x_batch, lstm.dropout_keep_prob: lstm_config.dropout_keep_prob } scores = lstm_sess.run([lstm.softmax_result], feed_dict=feed_dict) return scores lstm_scores = lstm_test_step(lstm_test_sample_arrays) with cnn_sess.as_default(): with cnn_graph.as_default(): cnn = cnn_model.TextCNN(config=cnn_config) cnn_saver = tf.train.Saver() cnn_saver.restore(cnn_sess, "data/cnn/text_model") def cnn_test_step(x_batch): feed_dict = { cnn.input_x: x_batch, cnn.dropout_keep_prob: cnn_config.dropout_keep_prob } scores = cnn_sess.run([cnn.softmax_result], feed_dict=feed_dict) return scores cnn_scores = cnn_test_step(cnn_test_sample_arrays) lstm_sess.close() cnn_sess.close() mixed_scores = np.sum([lstm_scores, cnn_scores], axis=0) predictions = np.argmax(mixed_scores, axis=2) return np.array(predictions)
def get_cnn_result(model): # if not os.path.exists(embedding_model_path): # print("word2vec model is not found") if not os.path.exists(train_data_path): print("train params is not found") params = readdata.loadDict(train_data_path) train_length = int(params['max_sentences_length']) #写入文件,处理文件 # mysql_server='localhost' # name='root' # password='******' # mysql_db='tensor' # db=pymysql.connect(mysql_server,name,password,mysql_db) #分词处理 # test_sample_lists = readdata.get_cleaned_list(test_file_path) ###1.训练模型用 test_sample_lists = readdata.get_query_list_cnn(db) # #测试用 #插入底层的分词处理 # test_sample_lists=getapi.post_url() test_sample_lists, max_sentences_length = readdata.padding_sentences( test_sample_lists, padding_token='<PADDING>', padding_sentence_length=train_length) #改前方法 # test_sample_arrays=np.array(word2vec.get_embedding_vector(test_sample_lists,embedding_model_path)) #改后 test_sample_arrays = np.array( word2vec.get_embedding_vector(test_sample_lists, model)) testconfig = config() testconfig.max_sentences_length = max_sentences_length sess = tf.InteractiveSession() cnn = Cnn_Model.TextCNN(config=testconfig) #加载参数 # tf.get_variable_scope().reuse_variables() saver = tf.train.Saver() saver.restore(sess, "E:/资料/Comments_Classifiation-master/data/cnn/text_model") #定义测试函数,可以给出相对应的预测还有分数。sess.run:变量的赋值和计算 def test_step(x_batch): feed_dict = {cnn.input_x: x_batch, cnn.dropout_keep_prob: 1.0} predictions, scores = sess.run([cnn.predictions, cnn.softmax_result], feed_dict=feed_dict) sess.close() return (predictions, scores) #拿到结果 predictions, scores = test_step(test_sample_arrays) return predictions, scores # print("(0->neg & 1->pos)the result is:") # print(predictions) # print("********************************") # print("the scores is:") # print(scores) # get_cnn_result(model)
num_labels = 3 embedding_size = 64 dropout_keep_prob = 0.9 batch_size = 128 num_epochs = 20 max_sentences_length = 25 num_layers = 3 max_grad_norm = 5 l2_rate = 0.0001 #加载数据 all_sample_lists, all_label_arrays, max_sentences_length = readdata.get_all_data_from_file( happy_file_path, angry_file_path, unhappy_file_path, force_len=40) all_sample_arrays = np.array( word2vec.get_embedding_vector(all_sample_lists, embedding_model_path)) del all_sample_lists print("sample.shape = {}".format(all_sample_arrays.shape)) print("label.shape = {}".format(all_label_arrays.shape)) trainconfig = config() trainconfig.max_sentences_length = max_sentences_length testconfig = config() testconfig.max_sentences_length = max_sentences_length testconfig.dropout_keep_prob = 1.0 #存储训练参数 params = { "num_labels": trainconfig.num_labels, "max_sentences_length": max_sentences_length } readdata.save(params, train_data_path)