def lda_infer(): category = request.form['category'] in_type = request.form['type'] f_text = input_doc_str(in_type) inference_engine_wrapper = InferenceEngineWrapper(get_model_dir(category), get_lda_conf()) seg_list = inference_engine_wrapper.tokenize(f_text) topic_dist = inference_engine_wrapper.lda_infer(seg_list) return json_format(topic_dist)
def doc_topic_word_lda(): category = request.form['category'] in_type = request.form['type'] f_text = input_doc_str(in_type) inference_engine_wrapper = InferenceEngineWrapper(get_model_dir(category), get_lda_conf()) seg_list = inference_engine_wrapper.tokenize(f_text) topic_dist = inference_engine_wrapper.lda_infer(seg_list) result = {} for key, value in dict(topic_dist).items(): twe_wrapper = TopicalWordEmbeddingsWrapper(get_model_dir(category), get_emb_file(category)) result_dict = dict( twe_wrapper.nearest_words_around_topic(int(key), get_count())) result[value] = result_dict return json.dumps(result)
result.append(ent) return result if __name__ == '__main__': path = '/media/iiip/数据/duanduan/data/validation.csv' documents = read_whole_file(path) if len(sys.argv) < 3: sys.stderr.write("Usage:python {} {} {}\n".format( sys.argv[0], "model_dir", "conf_file")) exit(-1) # 获取参数 model_dir = sys.argv[1] conf_file = sys.argv[2] # 创建InferenceEngineWrapper对象 inference_engine_wrapper = InferenceEngineWrapper(model_dir, conf_file) topic_result = {} for key in documents: print key seg_list = inference_engine_wrapper.tokenize(documents[key]) # 进行推断 topic_dist = inference_engine_wrapper.lda_infer(seg_list) topic_result[key] = cal_topic(topic_dist) file = open(path.replace(".csv", "_topic.csv"), 'w') writer = csv.writer(file) for each in topic_result: writer.writerow([each, topic_result[each][0], topic_result[each][1]]) file.close() # return topic_result
# Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. # # Author: [email protected] import sys from familia_wrapper import InferenceEngineWrapper if sys.version_info < (3,0): input = raw_input if __name__ == '__main__': if len(sys.argv) < 3: sys.stderr.write("Usage:python {} {} {}\n".format( sys.argv[0], "model_dir", "conf_file")) exit(-1) # 获取参数 model_dir = sys.argv[1] conf_file = sys.argv[2] # 创建InferenceEngineWrapper对象 inference_engine_wrapper = InferenceEngineWrapper(model_dir, conf_file) while True: input_text = input("Enter Document: ") # 分词 seg_list = inference_engine_wrapper.tokenize(input_text) # 进行推断 topic_dist = inference_engine_wrapper.lda_infer(seg_list) # 打印结果 print("Document Topic Distribution:") print(topic_dist)