def entityRecognize(word_list, question): entity_list = [] for word in word_list: entity = "" finalentity = "" for temp_entity in word_list[word_list.index(word):]: entity = entity + temp_entity all_entity = [entity] if len(entity) > 1: # print(entity) # print(1) if entity in mention2entity_dic: # 如果它有对应的实体 for alias in mention2entity_dic[entity]: all_entity.append(alias) for en in all_entity: same_name_entity_list = ccksNeo.get_entity_list_by_name(en) extra_name = ccksNeo.get_entity_info_by_name(en) for name in extra_name: if name[0][-1] == '名' or name[0][-1] == '称': if len(name[1]) > 1: if name[0] != '英文名' and name[0] != '英文名称' and name[0] != '外文名' and name[0] != '外文名称': entity_list.append(name[1]) if len(same_name_entity_list) >= 1: entity_list.append(en) # print(list(set(entity_list))) for entity1 in entity_list: # 如果短的指称被长的指称包含,检测短指称的一度关系名 temp = question for i in entity1: if i in question: temp = temp.replace(i, "") # temp_list = sentence.replace(entity1, "") # segmentor1 = Segmentor() # segmentor1.load("./ltpdata/ltp_data_v3.4.0/cws.model") # temp_list = segmentor1.segment(temp) # segmentor1.release() for entity2 in entity_list: if entity1 != entity2 and entity1 in entity2: # print(2) same_name_entity_list = ccksNeo.get_entity_list_by_name(entity1) flag = 0 for entitydict in same_name_entity_list: # print(entitydict, "用id查") # print(3) relations = ccksNeo.get_related_entities_by_id(entitydict['id']) # print(relations) for relation in relations: # 除掉实体的剩余句子 score = serviceWord2vec.get_similarity(list(jieba.cut(temp)), list(jieba.cut(relation['name']))) if score > 0.2: flag = 1 if flag == 0 and entity1 in entity_list: # print(entity_list) # print(entity1) entity_list.remove(entity1) print("entity_list", entity_list) # time.sleep(10) return entity_list
yitiaodaan = [] index2 = line.find('|||||') try: yitiao = line.split("|||||")[1] except: print(1) try: yitiaodaan = yitiao.split("||") except: print(3) an21 = [] if yitiaodaan: for i in yitiaodaan: if i != ' ': try: an21.append(i.split("|")[2]) except: print(1) comp = list(set(an21)) for mm in comp: info = ccksNeo.get_entity_info_by_name(mm) if info != []: sentence = sentence + '\t' + '<' + mm + '>' else: sentence = sentence + '\t' + '“' + mm + '”' print(comp) p.writelines(sentence + '\n')