def predict(input_path, output_path): df_i = pd.read_excel(input_path) corpus_i = df_i.iloc[:, [1]] corpus_i = np.array(corpus_i).tolist() corpus = reader.preprocess(reader.read_excel(input_path, text_column=1), seq_lenth=FLAGS.seq_lenth, seq_num=1, overlap_lenth=0, input_label=False, output_index=True) # vocab, word2id = reader.read_glossary() test_inputs = [] test_lenths = [] test_num = 0 for item in corpus: test_inputs.append(item[0]) test_lenths.append(item[1]) test_num += 1 with tf.Graph().as_default(), tf.Session() as sess: model = em_sent(seq_size=FLAGS.seq_lenth, glossary_size=FLAGS.glossary_size, embedding_size=FLAGS.embedding_size, hidden_size=FLAGS.hidden_size, attn_lenth=FLAGS.attn_lenth, is_training=False) model.buildTrainGraph() saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=10) if reader.restore_from_checkpoint(sess, saver, FLAGS.ckpt_dir): total_expection = [] print(test_num) for piece_inputs, piece_lenths in get_test_batch( test_inputs, test_lenths, None, test_num, input_label=False): test_feed_dict = { model.inputs: piece_inputs, model.lenths: piece_lenths, model.lenths_weight: padded_ones_list_like(piece_lenths, FLAGS.seq_lenth), } expection = sess.run(model.expection, feed_dict=test_feed_dict) total_expection.extend(expection) zipped = [] for index in range(test_num): zipped.append([ corpus_i[corpus[index][2]], 'T' if total_expection[index][0] == 0 else 'F' ]) df_o = pd.DataFrame(zipped) writer = pd.ExcelWriter(output_path) df_o.to_excel(writer, 'Sheet1') writer.save()
def test_onesent(text): corpus = reader.preprocess([[text]], seq_lenth=FLAGS.seq_lenth, seq_num=1, overlap_lenth=0, input_label=False, output_index=False) vocab, word2id = reader.read_glossary() print(corpus) test_inputs = [] test_lenths = [] test_num = 0 for item in corpus: test_inputs.append(item[0]) test_lenths.append(item[1]) test_num += 1 with tf.Graph().as_default(), tf.Session() as sess: model = em_sent(seq_size=FLAGS.seq_lenth, glossary_size=FLAGS.glossary_size, embedding_size=FLAGS.embedding_size, hidden_size=FLAGS.hidden_size, attn_lenth=FLAGS.attn_lenth, is_training=False) model.buildTrainGraph() saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=10) if reader.restore_from_checkpoint(sess, saver, FLAGS.ckpt_dir): test_feed_dict = { model.inputs: test_inputs, model.lenths: test_lenths, model.lenths_weight: padded_ones_list_like(test_lenths, FLAGS.seq_lenth), } expection, alpha, logits = sess.run( [model.expection, model.alpha, model.logits], feed_dict=test_feed_dict) print([vocab[i] for i in test_inputs[0]]) for i in range(len(test_inputs[0])): print(vocab[test_inputs[0][i]], alpha[0][i], logits[0][i]) # print([vocab[word] for word in test_inputs]) if (expection[0][0] == 1): print('负面') else: print('正面') return expection[0]
def test_for_lime(text): corpus = reader.preprocess([[i] for i in text], seq_lenth=FLAGS.seq_lenth, seq_num=1, overlap_lenth=0, input_label=False, output_index=False, split_func=lambda x: x.split(' '), de_duplicated=False) # vocab, word2id = reader.read_glossary() test_inputs = [] test_lenths = [] test_num = 0 for item in corpus: test_inputs.append(item[0]) test_lenths.append(item[1]) test_num += 1 saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=10) if reader.restore_from_checkpoint(SESS, saver, FLAGS.ckpt_dir): total_expection = [] for piece_inputs, piece_lenths in get_test_batch(test_inputs, test_lenths, None, test_num, input_label=False): test_feed_dict = { model.inputs: piece_inputs, model.lenths: piece_lenths, model.lenths_weight: padded_ones_list_like(piece_lenths, FLAGS.seq_lenth), } expection = SESS.run(model.raw_expection, feed_dict=test_feed_dict) total_expection.extend([[round(i[0], 4), round(1 - i[0], 4)] for i in expection]) return np.array(total_expection)
print('负面') temp.append('负面') elif (expection[index][0] == 0): print('非负面') temp.append('非负面') sp.append(temp) return sp if __name__ == '__main__': text = ['康宝莱】河南郑州', '花季少女死于康宝莱'] corpus = reader.preprocess(reader.read_excel( '../data/corpus/check/yxcd.xlsx', text_column=1, label_column=0), seq_lenth=FLAGS.seq_lenth, seq_num=1, overlap_lenth=0, input_label=True, output_index=False, de_duplicated=True) # with open('../data/corpus/latest/test.pickle', 'rb') as fp: # corpus = pickle.load(fp) test(corpus) # predict(input_path='../data/corpus/check/klb.xlsx', output_path='out/x.xlsx') # test_onesent('【康宝莱】河南郑州花季少女死于康宝莱传销相关部门难逃其咎: 我叫张云成,男,汉族,现年47岁,家住河南省淮阳县冯塘乡蔡李庄村。2017年6月5日,我的儿子张旭(17岁)从郑州回到家里,向家里要钱,...文字版>> http://t.cn/RonRbWK (新浪长微博>> http://t.cn/zOXAaic)') # test_onesent('走进康宝莱之前,我听到了台上的分享嘉宾说了这句话:“也许做康宝莱这件事不是你的梦想,但做好康宝莱能实现你所有的梦想!” 我信了。 果然这是真的。 同时,在这个过程中,帮助更多人获得好身材、健康、财富、和精彩人生,也一并成为了我的梦想!和我其他的梦想一起实现了!——肖珂宇 网页链接') # vocab, word2id = reader.read_glossary() # sp = get_word_sentiment_polarity(vocab) # # import pandas as pd #