def parse(words, postags): parser = Parser() # 初始化实例 parser.load('../ltp_data/parser.model') # 加载模型 arcs = parser.parse(words, postags) # 句法分析 # print '----------------' # print "\t".join("%d:%s" % (arc.head, arc.relation) for arc in arcs) # print '----------------' parser.release() # 释放模型 return arcs
def get_dependency(self, words): # 句法分析 postags = self.get_postags(words) parser = Parser() # 初始化实例 parser.load(self.par_model_path) # 加载模型 arcs = parser.parse(words, postags) # 句法分析 print("\t".join("%d:%s" % (arc.head, arc.relation) for arc in arcs)) parser.release() # 释放模型 return arcs
def __init__( self, word_pattern_file="H:\\new_github_workspace\\Syptom_Knowledge_Graph_309\\test_result.csv" ): self.word_pattern_file = word_pattern_file self.segmentor = Segmentor() self.postagger = Postagger() self.parser = Parser() self.recognizer = NamedEntityRecognizer()
def parse_arcs(words, postags): """ ltp 依存句法分析 """ parser_model_path = os.path.join(LTP_TOP_DIR, 'parser.model') parser = Parser() parser.load(parser_model_path) arcs = parser.parse(words, postags) parser.release() # return arcs return [(arc.head, arc.relation) for arc in arcs]
def __init__(self): CUR_DIR = os.getcwd( ) # '/'.join(os.path.abspath(__file__).split('/')[:-1]) LTP_DIR = os.path.join(CUR_DIR, "ltp_data") self.postagger = Postagger() self.postagger.load(os.path.join(LTP_DIR, "pos.model")) self.parser = Parser() self.parser.load(os.path.join(LTP_DIR, "parser.model")) print(os.path.join(LTP_DIR, "pos.model")) print(os.path.join(LTP_DIR, "parser.model"))
def parse(words, postags): parser = Parser() # 初始化实例 ltp_model_loader.load(parser) #parser.load('C:\\Users\\72770\\Documents\\Chatbot\\ltp-data-v3.3.1\\ltp_data\\parser.model') # 加载模型 arcs = parser.parse(words, postags) # 句法分析 print "\t".join("%d:%s" % (arc.head, arc.relation) for arc in arcs) #print len(arcs) parser.release() # 释放模型 return arcs
def __init__(self): self.postagger = Postagger() self.parser = Parser() self.parser.load(par_model_path) self.recognizer = NamedEntityRecognizer() self.recognizer.load(ner_model_path) self.labeller = SementicRoleLabeller() self.labeller.load(srl_model_path) self.postagger.load_with_lexicon(pos_model_path, '/home/wangwei/conf/posttags.txt')
def get_arcs_by_pyltp(self, words_list, postags_list): arcs_list = list() # 依存句法分析模型路径,模型名称为‘parser.model’ par_model_path = os.path.join(self.ltp_dir_path, "parser.model") parser = Parser() parser.load(par_model_path) arcs = parser.parse(words_list, postags_list) parser.release() arcs_list = list(arcs) return arcs_list
def __init__(self): self.segmentor = Segmentor() self.segmentor.load("model/cws.model") # self.segmentor.load_with_lexicon("model/cws.model", 'dict/segdict.txt') # 加载模型,第二个参数是您的外部词典文件路径 self.postagger = Postagger() # 初始化实例 self.postagger.load('model/pos.model') # 加载模型 self.parser = Parser() # 初始化实例 self.parser.load('model/parser.model') # 加载模型 self.recognizer = NamedEntityRecognizer() # 初始化实例 self.recognizer.load('model/ner.model')
def __init__(self): LTP_DIR = "ltp_data_v3.4.0" self.segmentor = Segmentor() self.segmentor.load(os.path.join(LTP_DIR, "cws.model")) self.postagger = Postagger() self.postagger.load(os.path.join(LTP_DIR, "pos.model")) self.parser = Parser() self.parser.load(os.path.join(LTP_DIR, "parser.model"))
def ltp_parser_data_test(): LTP_DATA_DIR = 'D:\BaiduNetdiskDownload\ltp_data_v3.4.0' # ltp模型目录的路径 cws_model_path = os.path.join(LTP_DATA_DIR, 'cws.model') # 分词模型路径,模型名称为`cws.model` from pyltp import Segmentor segmentor = Segmentor() # 初始化实例 segmentor.load(cws_model_path) # 加载模型 pos_model_path = os.path.join(LTP_DATA_DIR, 'pos.model') # 词性标注模型路径,模型名称为`pos.model` from pyltp import Postagger postagger = Postagger() # 初始化实例 postagger.load(pos_model_path) # 加载模型 par_model_path = os.path.join( LTP_DATA_DIR, 'parser.model') # 依存句法分析模型路径,模型名称为`parser.model` from pyltp import Parser parser = Parser() # 初始化实例 parser.load(par_model_path) # 加载模型 sents = [ 'API的全称是什么?', '中国是世界上老年人口最多的国家吗', '艾滋病是如何传染的', '1999年NBA总冠军是哪支球队', 'Flash是哪个公司的产品' ] for sent in sents: print(sent) words = segmentor.segment(sent) print(list(words)) postags = postagger.postag(words) print(list(postags)) arcs = parser.parse(words, postags) # 句法分析 print("\t".join("%d:%s" % (arc.head, arc.relation) for arc in arcs)) res = [] for i in range(len(arcs)): if arcs[i].head == 0: for j in range(len(arcs)): # if (arcs[j].head == i + 1 or arcs[j].head == 0 or arcs[j].relation == 'ADV' ) and arcs[j].relation != 'WP': if (arcs[j].head == i + 1 or arcs[j].head == 0) and arcs[j].relation != 'WP': res.append(j) print(res) for j in res: for i in range(len(arcs)): if arcs[i].head == j + 1 and arcs[i].relation == 'ATT': res.append(i) res = list(set(res)) res.sort() print(res) print(' '.join([words[i] for i in res]) + '\n') segmentor.release() # 释放模型 postagger.release() # 释放模型 parser.release() # 释放模型
def __init__(self): self.nr_table_name = '' # 词性标注 self.postagger = Postagger() self.postagger.load('data/ltp_data/pos.model') # 依存句法分析 self.parser = Parser() self.parser.load('data/ltp_data/parser.model') self.faker_speakers = open('data/model_data/fake_speakers').read().strip().split(',')
def name_recognize_one(): import sys, os import pyltp from pyltp import SentenceSplitter, Segmentor, Postagger, Parser, NamedEntityRecognizer, SementicRoleLabeller paragraph = '叙利亚东古塔地区。7日发生疑似化学武器袭击事件,导致70余人丧生。报道一出,叙利亚反对派、美国、英国、法国等纷纷指责叙政府军使用化学武器袭击无辜平民。但叙利亚坚决否认,并指责西方和叙反对派造谣,目的是保护被围困的恐怖分子。俄外交部则认为,该谣言旨在袒护恐怖分子,并为外部势力发动打击寻找借口。' sentence = SentenceSplitter.split(paragraph)[1] print('split {}'.format(sentence)) # 断句 # for i in sentence: # print(i) # print() segmentor = Segmentor() segmentor.load(sg_model_path) words = segmentor.segment(sentence) print('|'.join(words)) postagger = Postagger() postagger.load(ps_model_path) postags = postagger.postag(words) for k, v in dict(zip(words, postags)).items(): print(k, v) # print(' ## '.join(postags)) parser = Parser() parser.load(pr_model_path) arcs = parser.parse(words, postags) print(' '.join('%d:%s ' % (arc.head, arc.relation) for arc in arcs)) print('#' * 8) recognizer = NamedEntityRecognizer() recognizer.load(ner_model_path) netag = recognizer.recognize(words, postags) for word, ntag in zip(words, netag): if ntag != 'O': # print('ntag') print(word + ' / ' + netag) print(' / '.join(netag)) # 命名实体识别 word_list = ['欧几里得', '是', '西元前', '三', '世纪', '的', '希腊', '数学家', '。'] postags_list = ['nh', 'v', 'nt', 'm', 'n', 'u', 'ns', 'n', 'wp'] nertags = recognizer.recognize(word_list, postags_list) for word, ntag in zip(word_list, nertags): if ntag != 'O': print(word + '/' + ntag) #print (" ".join(word_list)) print(' '.join(nertags)) segmentor.release() postagger.release() parser.release() recognizer.release()
def __init__(self): self.segmentor = Segmentor() self.segmentor.load('/home/student/project/project-01/Four-Little-Frogs/ltp_data_v3.4.0/cws.model') self.postagger = Postagger() self.postagger.load('/home/student/project/project-01/Four-Little-Frogs/ltp_data_v3.4.0/pos.model') self.parser = Parser() self.parser.load('/home/student/project/project-01/Four-Little-Frogs/ltp_data_v3.4.0/parser.model') self.recognizer = NamedEntityRecognizer() self.recognizer.load('/home/student/project/project-01/Four-Little-Frogs/ltp_data_v3.4.0/ner.model') self.labeller = SementicRoleLabeller() self.labeller.load('/home/student/project/project-01/Four-Little-Frogs/ltp_data_v3.4.0/pisrl.model')
def generate_ltp_results(): """ 读取train_base.json 分词,词性标注,NER,(依存,语义角色按需加入) 然后返回data(源数据),以及处理好的结果的list :return: """ modelpath = '../../../ltp_data/data/' data = read_file_in_ltp('../data/train_base.json') sentences = list(map(lambda x: x['content'], data)) # 分词 segmentor = Segmentor() segmentor.load(modelpath + 'cws.model') segmented = [list(segmentor.segment(x.lower().replace(' ', '_'))) for x in sentences] segmentor.release() # 词性标注 postagger = Postagger() postagger.load(modelpath + 'pos.model') posed = [list(postagger.postag(x)) for x in segmented] postagger.release() # 命名实体识别 recognizer = NamedEntityRecognizer() recognizer.load(modelpath + 'ner.model') nered = [list(recognizer.recognize(x, posed[i])) for (i, x) in enumerate(segmented)] recognizer.release() # 依存句法分析 todo 依存句法的分析结果是一棵树,无法直接拼接到embedding上,有办法吗。 # todo 依然要做,因为依存句法分析是语义角色标注的前置 parser = Parser() parser.load(modelpath + 'parser.model') arcs = [list(parser.parse(x, posed[i])) for (i, x) in enumerate(segmented)] parser.release() # 语义角色标注 # srl_labeller = SementicRoleLabeller() # srl_labeller.load(modelpath + 'pisrl_win.model') # # roles = [list(srl_labeller.label(x, posed[i], arcs[i])) for (i, x) in enumerate(segmented[:500])] # srl_labeller.release() # pickle.dump(roles, open('roles0-500.pk', 'wb')) # # print('1\n') # print_role(roles[0], segmented[0]) # print('\n2\n') # print_role(roles[1], segmented[1]) # pickle.dump([segmented, posed, nered, arcs, roles], open('segmented_posed_nered_roles.pk', 'wb')) return data, segmented, posed, nered, arcs
def get_models(): LTP_DATA_DIR = r'ltp_data_v3.4.0' # LTP模型目录路径 # 分词 segmentor = Segmentor() # 初始化 segmentor.load(os.path.join(LTP_DATA_DIR, 'cws.model')) # 加载模型 # 词性标注 postagger = Postagger() # 初始化 postagger.load(os.path.join(LTP_DATA_DIR, 'pos.model')) # 加载模型 parser = Parser() parser.load(os.path.join(LTP_DATA_DIR, 'parser.model')) return segmentor, postagger, parser
def __init__(self): self.__clause_list = [] self.__subclause_dict = {} self.__triple_list = [] self.__segmentor = Segmentor() self.__postagger = Postagger() self.__recognizer = NamedEntityRecognizer() self.__parser = Parser() self.__labeller = SementicRoleLabeller() self.__words_full_list = [] self.__netags_full_list = []
def __init__(self): cws_model_path = os.path.join(config.LTP_DATA_DIR, 'cws.model') self.segmentor = Segmentor() self.segmentor.load_with_lexicon(cws_model_path, config.dic_path) pos_model_path = os.path.join(config.LTP_DATA_DIR, 'pos.model') self.postagger = Postagger() self.postagger.load(pos_model_path) par_model_path = os.path.join(config.LTP_DATA_DIR, 'parser.model') self.parser = Parser() self.parser.load(par_model_path)
def parsing(self, sentence): words = list(self.pyltp_cut(sentence)) # pyltp分词 # words=list(jieba.cut(sentence)) #结巴分词 postags = list(self.postagger.postag(words)) # 词性标注 # tmp=[str(k+1)+'-'+v for k,v in enumerate(words)] # print('\t'.join(tmp)) parser = Parser() # 初始化实例 parser.load(par_model_path) # 加载模型 arcs = parser.parse(words, postags) # 句法分析 parser.release() # 释放模型 return arcs
def __init__(self, data_dir: str): self.segmentor = Segmentor() self.segmentor.load(os.path.join(data_dir, "cws.model")) self.postagger = Postagger() self.postagger.load(os.path.join(data_dir, "pos.model")) self.recognizer = NamedEntityRecognizer() self.recognizer.load(os.path.join(data_dir, "ner.model")) self.parser = Parser() self.parser.load(os.path.join(data_dir, "parser.model")) self.labeller = SementicRoleLabeller() self.labeller.load(os.path.join(data_dir, "pisrl.model"))
def __init__(self, model_path): self.model_path = model_path self.segmentor = Segmentor() # 分词初始化实例 self.segmentor.load_with_lexicon(path.join(self.model_path, 'cws.model'), path.join(self.model_path, 'dictionary_kfc.txt')) self.postagger = Postagger() # 词性标注初始化实例 self.postagger.load(path.join(self.model_path, 'pos.model') ) # 加载模型 self.recognizer = NamedEntityRecognizer() # 命名实体识别初始化实例 self.recognizer.load(path.join(self.model_path, 'ner.model')) self.parser = Parser() # 依存句法初始化实例 s self.parser.load(path.join(self.model_path, 'parser.model')) # 加载模型 self.labeller = SementicRoleLabeller() # 语义角色标注初始化实例 self.labeller.load(path.join(self.model_path, 'srl'))
def __init__(self): # 初始化实例,加载模型 self.segmentor = Segmentor() # 初始化实例 self.segmentor.load_with_lexicon( self.cws_model_path, self.special_word_path) # 加载模型,第二个参数是您的外部词典文件路径 self.postagger = Postagger() self.postagger.load(self.pos_model_path) self.recognizer = NamedEntityRecognizer() self.recognizer.load(self.ner_model_path) self.parser = Parser() self.parser.load(self.parser_model_path)
def __init__(self): self.nr_table_name = '' self.nrTable = None # 词性标注 pos_model_path = os.path.join(os.path.dirname(__file__), '../data/ltp_data/pos.model') self.postagger = Postagger() # self.postagger.load(pos_model_path) # 依存句法分析 par_model_path = os.path.join(os.path.dirname(__file__), '../data/ltp_data/parser.model') self.parser = Parser() self.parser.load(par_model_path)
def __init__(self): self.postagger = Postagger() pos_model_path = os.path.join(LTP_DATA_DIR, 'pos.model') self.postagger.load(pos_model_path) self.recognizer = NamedEntityRecognizer() ner_model_path = os.path.join(LTP_DATA_DIR, 'ner.model') self.recognizer.load(ner_model_path) self.parser = Parser() par_model_path = os.path.join(LTP_DATA_DIR, 'parser.model') self.parser.load(par_model_path)
def parse(words, postags): parser = Parser() # 初始化实例 parser.load( '/Users/zhangqinyuan/Downloads/ltp_data_v3.4.0/parser.model') # 加载模型 arcs = parser.parse(words, postags) # 句法分析 #print ("\t".join("%d:%s" % (arc.head, arc.relation) for arc in arcs)) i = 0 for word, arc in zip(words, arcs): i = i + 1 print(str(i) + '/' + str(arc.head) + '/' + str(arc.relation)) parser.release() # 释放模型 return arcs
def _load_dataset(self): """ 加载数据集:训练集,验证集 :return: """ par_model_path = os.path.join(self.ltp_dir, 'parser.model') pos_model_path = os.path.join(self.ltp_dir, 'pos.model') postagger = Postagger() postagger.load(pos_model_path) parser = Parser() parser.load(par_model_path) examples = [] if not os.path.exists( os.path.join( self.data_dir, self.file_name.split('.')[0] + '_{}_.pkl'.format(self.class_id))): with open(os.path.join(self.data_dir, self.file_name)) as f: for l in tqdm(f): l = json.loads(l) # 分词 pos ner : 中文命名实体识别是字符级模型(bert),所以用 list将字符串转换为字符列表。至于输出,格式为 (entity, type, begin, end)。 text_seg = jieba.lcut(l['text'], HMM=False) poses = ' '.join(postagger.postag(text_seg)).split() arcs = parser.parse(text_seg, poses) arcses = ' '.join("%d:%s" % (arc.head, arc.relation) for arc in arcs).split() example = self.align_bert(l, text_seg, arcses) if len(example.events) == 0: continue examples.append(example) with open( os.path.join( self.data_dir, self.file_name.split('.')[0] + '_{}_.pkl'.format(self.class_id)), 'wb') as f: pickle.dump(examples, f) print('saved {}'.format( os.path.join( self.data_dir, self.file_name.split('.')[0] + '_{}_.pkl'.format(self.class_id)))) else: with open( os.path.join( self.data_dir, self.file_name.split('.')[0] + '_{}_.pkl'.format(self.class_id)), 'rb') as f: examples = pickle.load(f) return examples
def __init__(self): LTP_DIR = "./ltp_data" self.segmentor = Segmentor() self.segmentor.load(os.path.join(LTP_DIR, "cws.model")) self.postagger = Postagger() self.postagger.load(os.path.join(LTP_DIR, "pos.model")) self.parser = Parser() self.parser.load(os.path.join(LTP_DIR, "parser.model")) self.recognizer = NamedEntityRecognizer() self.recognizer.load(os.path.join(LTP_DIR, "ner.model"))
def __init__(self, dict_path=None): super(DepTree, self).__init__() print("正在加载LTP模型... ...") self.segmentor = Segmentor() if dict_path is None: self.segmentor.load(os.path.join(MODELDIR, "cws.model")) else: self.segmentor.load_with_lexicon(os.path.join(MODELDIR, "cws.model"), dict_path) self.postagger = Postagger() self.postagger.load(os.path.join(MODELDIR, "pos.model")) self.parser = Parser() self.parser.load(os.path.join(MODELDIR, "parser.model")) print("加载模型完毕。")
def __init__(self, component_config: Dict[Text, Any] = None): super(LanguageAnalysis, self).__init__(component_config) self.dimensions = component_config['dimensions'] ltp_path = component_config.get('ltp_path') self.postagger = Postagger() self.postagger.load(os.path.join(ltp_path, "pos.model")) self.parser = Parser() self.parser.load(os.path.join(ltp_path, "parser.model")) self.recognizer = NamedEntityRecognizer() self.recognizer.load(os.path.join(ltp_path, "ner.model"))
def parse(sentence): ''' LTP 依存句法分析 ''' parser = Parser() parser.load('/Users/zt/Documents/ltp_data/parser.model') words = segmentor(sentence) postags = posttagger(sentence) arcs = parser.parse(words, postags) res = [(arc.head, arc.relation) for arc in arcs] for i in range(len(res)): print(words[i], '---', res[i][1], '-->', words[res[i][0] - 1]) parser.release()