def depend_parse(): ht0 = HarvestText() para = "上港的武磊武球王是中国最好的前锋。" entity_mention_dict = {'武磊': ['武磊', '武球王'], "上海上港": ["上港"]} entity_type_dict = {'武磊': '球员', "上海上港": "球队"} ht0.add_entities(entity_mention_dict, entity_type_dict) for arc in ht0.dependency_parse(para): print(arc) print(ht0.triple_extraction(para))
def test_depend_parse(): sys.stdout, expected = open(get_current_function_name()+"_current","w"), open(get_current_function_name()+"_expected").read() ht0 = HarvestText() para = "上港的武磊武球王是中国最好的前锋。" entity_mention_dict = {'武磊': ['武磊', '武球王'], "上海上港":["上港"]} entity_type_dict = {'武磊': '球员', "上海上港":"球队"} ht0.add_entities(entity_mention_dict, entity_type_dict) for arc in ht0.dependency_parse(para): print(arc) print(ht0.triple_extraction(para)) sys.stdout.close() assert open(get_current_function_name() + "_current").read() == expected
def get_seq(text): """ 获取关键内容 三元组抽取 """ ht=HarvestText() s=[] text=tkitText.Text().clear(text) for item in ht.triple_extraction(sent=text, standard_name=False, stopwords=None, expand = "all"): if item=='': pass else: # print(' '.join(item)) # s.append(str(item)) s.append(''.join(item)) # s="。".join(s) return s
from harvesttext import HarvestText ht0 = HarvestText() s = "nnn" print("ht知识:", ht0.triple_extraction(sent=s, expand="exclude_entity"))