def get_cnn_result():
    if not os.path.exists(embedding_model_path):
        print("word2vec model is not found")

    if not os.path.exists(train_data_path):
        print("train params is not found")

    params = readdata.loadDict(train_data_path)
    train_length = int(params['max_sentences_length'])

    #写入文件,处理文件
    test_sample_lists = readdata.get_cleaned_list(test_file_path)
    test_sample_lists, max_sentences_length = readdata.padding_sentences(
        test_sample_lists,
        padding_token='<PADDING>',
        padding_sentence_length=train_length)
    test_sample_arrays = np.array(
        word2vec.get_embedding_vector(test_sample_lists, embedding_model_path))
    testconfig = config()
    testconfig.max_sentences_length = max_sentences_length

    sess = tf.InteractiveSession()
    cnn = Cnn_Model.TextCNN(config=testconfig)

    #加载参数
    saver = tf.train.Saver()
    saver.restore(
        sess,
        "D:/urunD:/urun/Comments_Classifiation-master/data/cnn/text_model")

    #定义测试函数,可以给出相对应的预测还有分数。sess.run:变量的赋值和计算
    def test_step(x_batch):
        feed_dict = {cnn.input_x: x_batch, cnn.dropout_keep_prob: 1.0}
        predictions, scores = sess.run([cnn.predictions, cnn.softmax_result],
                                       feed_dict=feed_dict)
        return (predictions, scores)

    #拿到结果
    predictions, scores = test_step(test_sample_arrays)
    return np.array(predictions)
def get_lstm_result():
    if not os.path.exists(embedding_model_path):
        print("word2vec model is not found")

    if not os.path.exists(train_data_path):
        print("train params is not found")

    params = readdata.loadDict(train_data_path)
    train_length = int(params['max_sentences_length'])

    test_sample_lists = readdata.get_cleaned_list(test_file_path)
    test_sample_lists, max_sentences_length = readdata.padding_sentences(
        test_sample_lists,
        padding_token='<PADDING>',
        padding_sentence_length=train_length)
    test_sample_arrays = np.array(
        word2vec.get_embedding_vector(test_sample_lists, embedding_model_path))
    testconfig = config()
    testconfig.max_sentences_length = max_sentences_length

    sess = tf.InteractiveSession()
    lstm = Lstm_Model.TextLSTM(config=testconfig)

    saver = tf.train.Saver()
    saver.restore(
        sess, "D:/urun/Comments_Classifiation-master/data/lstm/text_model")

    #定义测试函数
    def test_step(x_batch):
        feed_dict = {
            lstm.input_x: x_batch,
            lstm.dropout_keep_prob: testconfig.dropout_keep_prob
        }
        predictions, scores = sess.run([lstm.predictions, lstm.softmax_result],
                                       feed_dict=feed_dict)
        return (predictions, scores)

    predictions, scores = test_step(test_sample_arrays)
    return np.array(predictions)
Пример #3
0
def get_mixed_result():
    if not os.path.exists(embedding_model_path):
        print("word2vec model is not found")

    if not os.path.exists(lstm_train_data_path):
        print("lstm train params is not found")

    lstm_params = readdata.loadDict(lstm_train_data_path)
    lstm_train_length = int(lstm_params['max_sentences_length'])

    if not os.path.exists(cnn_train_data_path):
        print("cnn train params is not found")

    cnn_params = readdata.loadDict(cnn_train_data_path)
    cnn_train_length = int(cnn_params['max_sentences_length'])

    test_sample_lists = readdata.get_cleaned_list(test_file_path)
    lstm_test_sample_lists, lstm_max_sentences_length = readdata.padding_sentences(
        test_sample_lists,
        padding_token='<PADDING>',
        padding_sentence_length=lstm_train_length)
    cnn_test_sample_lists, cnn_max_sentences_length = readdata.padding_sentences(
        test_sample_lists,
        padding_token='<PADDING>',
        padding_sentence_length=cnn_train_length)
    lstm_test_sample_arrays = np.array(
        word2vec.get_embedding_vector(lstm_test_sample_lists,
                                      embedding_model_path))
    cnn_test_sample_arrays = np.array(
        word2vec.get_embedding_vector(cnn_test_sample_lists,
                                      embedding_model_path))
    lstm_config = lstmconfig()
    cnn_config = cnnconfig()
    lstm_config.max_sentences_length = lstm_max_sentences_length
    cnn_config.max_sentences_length = cnn_max_sentences_length

    lstm_graph = tf.Graph()
    cnn_graph = tf.Graph()
    lstm_sess = tf.Session(graph=lstm_graph)
    cnn_sess = tf.Session(graph=cnn_graph)

    with lstm_sess.as_default():
        with lstm_graph.as_default():
            lstm = lstm_model.TextLSTM(config=lstm_config)
            lstm_saver = tf.train.Saver()
            lstm_saver.restore(lstm_sess, "data/lstm/text_model")

            def lstm_test_step(x_batch):
                feed_dict = {
                    lstm.input_x: x_batch,
                    lstm.dropout_keep_prob: lstm_config.dropout_keep_prob
                }
                scores = lstm_sess.run([lstm.softmax_result],
                                       feed_dict=feed_dict)
                return scores

            lstm_scores = lstm_test_step(lstm_test_sample_arrays)

    with cnn_sess.as_default():
        with cnn_graph.as_default():
            cnn = cnn_model.TextCNN(config=cnn_config)
            cnn_saver = tf.train.Saver()
            cnn_saver.restore(cnn_sess, "data/cnn/text_model")

            def cnn_test_step(x_batch):
                feed_dict = {
                    cnn.input_x: x_batch,
                    cnn.dropout_keep_prob: cnn_config.dropout_keep_prob
                }
                scores = cnn_sess.run([cnn.softmax_result],
                                      feed_dict=feed_dict)
                return scores

            cnn_scores = cnn_test_step(cnn_test_sample_arrays)

    lstm_sess.close()
    cnn_sess.close()
    mixed_scores = np.sum([lstm_scores, cnn_scores], axis=0)
    predictions = np.argmax(mixed_scores, axis=2)
    return np.array(predictions)
Пример #4
0
def get_cnn_result(model):
    # if not os.path.exists(embedding_model_path):
    #     print("word2vec model is not found")

    if not os.path.exists(train_data_path):
        print("train params is not found")

    params = readdata.loadDict(train_data_path)
    train_length = int(params['max_sentences_length'])

    #写入文件,处理文件
    # mysql_server='localhost'
    # name='root'
    # password='******'
    # mysql_db='tensor'
    # db=pymysql.connect(mysql_server,name,password,mysql_db)

    #分词处理
    # test_sample_lists = readdata.get_cleaned_list(test_file_path) ###1.训练模型用
    test_sample_lists = readdata.get_query_list_cnn(db)  # #测试用
    #插入底层的分词处理
    # test_sample_lists=getapi.post_url()
    test_sample_lists, max_sentences_length = readdata.padding_sentences(
        test_sample_lists,
        padding_token='<PADDING>',
        padding_sentence_length=train_length)
    #改前方法
    # test_sample_arrays=np.array(word2vec.get_embedding_vector(test_sample_lists,embedding_model_path))
    #改后
    test_sample_arrays = np.array(
        word2vec.get_embedding_vector(test_sample_lists, model))

    testconfig = config()
    testconfig.max_sentences_length = max_sentences_length

    sess = tf.InteractiveSession()
    cnn = Cnn_Model.TextCNN(config=testconfig)

    #加载参数
    # tf.get_variable_scope().reuse_variables()
    saver = tf.train.Saver()
    saver.restore(sess,
                  "E:/资料/Comments_Classifiation-master/data/cnn/text_model")

    #定义测试函数,可以给出相对应的预测还有分数。sess.run:变量的赋值和计算

    def test_step(x_batch):

        feed_dict = {cnn.input_x: x_batch, cnn.dropout_keep_prob: 1.0}
        predictions, scores = sess.run([cnn.predictions, cnn.softmax_result],
                                       feed_dict=feed_dict)
        sess.close()
        return (predictions, scores)

    #拿到结果
    predictions, scores = test_step(test_sample_arrays)

    return predictions, scores
    # print("(0->neg & 1->pos)the result is:")
    # print(predictions)
    # print("********************************")
    # print("the scores is:")
    # print(scores)


# get_cnn_result(model)
    num_labels = 3
    embedding_size = 64
    dropout_keep_prob = 0.9
    batch_size = 128
    num_epochs = 20
    max_sentences_length = 25
    num_layers = 3
    max_grad_norm = 5
    l2_rate = 0.0001


#加载数据
all_sample_lists, all_label_arrays, max_sentences_length = readdata.get_all_data_from_file(
    happy_file_path, angry_file_path, unhappy_file_path, force_len=40)
all_sample_arrays = np.array(
    word2vec.get_embedding_vector(all_sample_lists, embedding_model_path))
del all_sample_lists
print("sample.shape = {}".format(all_sample_arrays.shape))
print("label.shape = {}".format(all_label_arrays.shape))
trainconfig = config()
trainconfig.max_sentences_length = max_sentences_length
testconfig = config()
testconfig.max_sentences_length = max_sentences_length
testconfig.dropout_keep_prob = 1.0

#存储训练参数
params = {
    "num_labels": trainconfig.num_labels,
    "max_sentences_length": max_sentences_length
}
readdata.save(params, train_data_path)