Пример #1
0
from entity_verb.nlp import NLP
import json
f =open('entity_verb_result\\' + "all_entity.json"
                 , 'r', encoding='utf-8')
file = f.read()
all_entity = json.loads(file)['all_entity']
f.close()
nlp =NLP()
postag_dict = dict()
for word in all_entity:
    postage = nlp.get_postag(word)
    if postage not in postag_dict.keys():
        postag_dict[postage] = [word]
    else:
        postag_dict[postage].append(word)
print(postag_dict)
for i in postag_dict.keys():
    postag_dict_new  = dict()
    postag_dict_new[i+'_'+str(len(postag_dict[i]))] = list(postag_dict[i])
    with open("entity_verb_result\\entity_classification_LTP.json", 'a') as f_out:
            f_out.write(json.dumps(postag_dict_new,ensure_ascii=False))
            f_out.write("\n")
# print(all_entity)
Пример #2
0
if __name__ == "__main__":
    # 读取文件
    entity_verb_new = entity_verb_new()
    """
    加载LTP的分词器和词性标注器
    """
    default_model_dir = 'D:\python-file\knowledge_extraction-master-tyz\\ltp_data_v3.4.0\\'  # LTP模型文件目录
    segmentor = Segmentor()
    user_dict = "source\\user.txt"
    segmentor_flag = segmentor.load_with_lexicon(
        os.path.join(default_model_dir, 'cws.model'), user_dict)

    postagger = Postagger()
    postag_flag = postagger.load(os.path.join(default_model_dir, 'pos.model'))

    nlp = NLP()
    thu1 = thulac.thulac()  # 默认模式
    path = r"D:\python-file\北京市旅游知识图谱\\verb-entity\\bj_travel"
    file_list = os.listdir(path)
    f = open('entity_verb_result\\' + "all_entity.json", 'r', encoding='utf-8')
    file = f.read()
    all_entity = json.loads(file)['all_entity']

    print(all_entity)
    f.close()
    for file_name in file_list:
        print(file_name)
        f = open('D:\python-file\北京市旅游知识图谱\\verb-entity\\bj_travel\\' +
                 file_name,
                 'r',
                 encoding='utf-8')