示例#1
0
def parse(words, postags):
    parser = Parser()  # 初始化实例
    parser.load('../ltp_data/parser.model')  # 加载模型
    arcs = parser.parse(words, postags)  # 句法分析
    # print '----------------'
    # print "\t".join("%d:%s" % (arc.head, arc.relation) for arc in arcs)
    # print '----------------'
    parser.release()  # 释放模型
    return arcs
示例#2
0
 def get_dependency(self, words):
     # 句法分析
     postags = self.get_postags(words)
     parser = Parser()  # 初始化实例
     parser.load(self.par_model_path)  # 加载模型
     arcs = parser.parse(words, postags)  # 句法分析
     print("\t".join("%d:%s" % (arc.head, arc.relation) for arc in arcs))
     parser.release()  # 释放模型
     return arcs
示例#3
0
    def __init__(
        self,
        word_pattern_file="H:\\new_github_workspace\\Syptom_Knowledge_Graph_309\\test_result.csv"
    ):

        self.word_pattern_file = word_pattern_file
        self.segmentor = Segmentor()
        self.postagger = Postagger()
        self.parser = Parser()
        self.recognizer = NamedEntityRecognizer()
示例#4
0
def parse_arcs(words, postags):
    """ ltp 依存句法分析 """
    parser_model_path = os.path.join(LTP_TOP_DIR, 'parser.model')
    parser = Parser()
    parser.load(parser_model_path)

    arcs = parser.parse(words, postags)
    parser.release()
    # return arcs
    return [(arc.head, arc.relation) for arc in arcs]
示例#5
0
 def __init__(self):
     CUR_DIR = os.getcwd(
     )  # '/'.join(os.path.abspath(__file__).split('/')[:-1])
     LTP_DIR = os.path.join(CUR_DIR, "ltp_data")
     self.postagger = Postagger()
     self.postagger.load(os.path.join(LTP_DIR, "pos.model"))
     self.parser = Parser()
     self.parser.load(os.path.join(LTP_DIR, "parser.model"))
     print(os.path.join(LTP_DIR, "pos.model"))
     print(os.path.join(LTP_DIR, "parser.model"))
示例#6
0
def parse(words, postags):
    parser = Parser()  # 初始化实例
    ltp_model_loader.load(parser)
    #parser.load('C:\\Users\\72770\\Documents\\Chatbot\\ltp-data-v3.3.1\\ltp_data\\parser.model')  # 加载模型
    arcs = parser.parse(words, postags)  # 句法分析

    print "\t".join("%d:%s" % (arc.head, arc.relation) for arc in arcs)
    #print len(arcs)
    parser.release()  # 释放模型
    return arcs
示例#7
0
 def __init__(self):
     self.postagger = Postagger()
     self.parser = Parser()
     self.parser.load(par_model_path)
     self.recognizer = NamedEntityRecognizer()
     self.recognizer.load(ner_model_path)
     self.labeller = SementicRoleLabeller()
     self.labeller.load(srl_model_path)
     self.postagger.load_with_lexicon(pos_model_path,
                                      '/home/wangwei/conf/posttags.txt')
示例#8
0
 def get_arcs_by_pyltp(self, words_list, postags_list):
     arcs_list = list()
     # 依存句法分析模型路径,模型名称为‘parser.model’
     par_model_path = os.path.join(self.ltp_dir_path, "parser.model")
     parser = Parser()
     parser.load(par_model_path)
     arcs = parser.parse(words_list, postags_list)
     parser.release()
     arcs_list = list(arcs)
     return arcs_list
示例#9
0
 def __init__(self):
     self.segmentor = Segmentor()
     self.segmentor.load("model/cws.model")
     # self.segmentor.load_with_lexicon("model/cws.model", 'dict/segdict.txt') # 加载模型,第二个参数是您的外部词典文件路径
     self.postagger = Postagger()  # 初始化实例
     self.postagger.load('model/pos.model')  # 加载模型
     self.parser = Parser()  # 初始化实例
     self.parser.load('model/parser.model')  # 加载模型
     self.recognizer = NamedEntityRecognizer()  # 初始化实例
     self.recognizer.load('model/ner.model')
示例#10
0
    def __init__(self):
        LTP_DIR = "ltp_data_v3.4.0"
        self.segmentor = Segmentor()
        self.segmentor.load(os.path.join(LTP_DIR, "cws.model"))

        self.postagger = Postagger()
        self.postagger.load(os.path.join(LTP_DIR, "pos.model"))

        self.parser = Parser()
        self.parser.load(os.path.join(LTP_DIR, "parser.model"))
示例#11
0
def ltp_parser_data_test():
    LTP_DATA_DIR = 'D:\BaiduNetdiskDownload\ltp_data_v3.4.0'  # ltp模型目录的路径

    cws_model_path = os.path.join(LTP_DATA_DIR,
                                  'cws.model')  # 分词模型路径,模型名称为`cws.model`
    from pyltp import Segmentor
    segmentor = Segmentor()  # 初始化实例
    segmentor.load(cws_model_path)  # 加载模型

    pos_model_path = os.path.join(LTP_DATA_DIR,
                                  'pos.model')  # 词性标注模型路径,模型名称为`pos.model`

    from pyltp import Postagger
    postagger = Postagger()  # 初始化实例
    postagger.load(pos_model_path)  # 加载模型

    par_model_path = os.path.join(
        LTP_DATA_DIR, 'parser.model')  # 依存句法分析模型路径,模型名称为`parser.model`
    from pyltp import Parser
    parser = Parser()  # 初始化实例
    parser.load(par_model_path)  # 加载模型

    sents = [
        'API的全称是什么?', '中国是世界上老年人口最多的国家吗', '艾滋病是如何传染的', '1999年NBA总冠军是哪支球队',
        'Flash是哪个公司的产品'
    ]
    for sent in sents:
        print(sent)
        words = segmentor.segment(sent)
        print(list(words))
        postags = postagger.postag(words)
        print(list(postags))
        arcs = parser.parse(words, postags)  # 句法分析
        print("\t".join("%d:%s" % (arc.head, arc.relation) for arc in arcs))
        res = []
        for i in range(len(arcs)):
            if arcs[i].head == 0:
                for j in range(len(arcs)):
                    # if (arcs[j].head == i + 1 or arcs[j].head == 0 or arcs[j].relation == 'ADV' ) and arcs[j].relation != 'WP':
                    if (arcs[j].head == i + 1
                            or arcs[j].head == 0) and arcs[j].relation != 'WP':
                        res.append(j)
        print(res)
        for j in res:
            for i in range(len(arcs)):
                if arcs[i].head == j + 1 and arcs[i].relation == 'ATT':
                    res.append(i)
        res = list(set(res))
        res.sort()
        print(res)
        print(' '.join([words[i] for i in res]) + '\n')

    segmentor.release()  # 释放模型
    postagger.release()  # 释放模型
    parser.release()  # 释放模型
    def __init__(self):
        self.nr_table_name = ''
        # 词性标注
        self.postagger = Postagger()
        self.postagger.load('data/ltp_data/pos.model')

        # 依存句法分析
        self.parser = Parser()
        self.parser.load('data/ltp_data/parser.model')

        self.faker_speakers = open('data/model_data/fake_speakers').read().strip().split(',')
示例#13
0
def name_recognize_one():
    import sys, os
    import pyltp
    from pyltp import SentenceSplitter, Segmentor, Postagger, Parser, NamedEntityRecognizer, SementicRoleLabeller

    paragraph = '叙利亚东古塔地区。7日发生疑似化学武器袭击事件,导致70余人丧生。报道一出,叙利亚反对派、美国、英国、法国等纷纷指责叙政府军使用化学武器袭击无辜平民。但叙利亚坚决否认,并指责西方和叙反对派造谣,目的是保护被围困的恐怖分子。俄外交部则认为,该谣言旨在袒护恐怖分子,并为外部势力发动打击寻找借口。'

    sentence = SentenceSplitter.split(paragraph)[1]
    print('split {}'.format(sentence))
    # 断句
    #     for i in sentence:
    #         print(i)
    #         print()
    segmentor = Segmentor()
    segmentor.load(sg_model_path)
    words = segmentor.segment(sentence)
    print('|'.join(words))

    postagger = Postagger()
    postagger.load(ps_model_path)
    postags = postagger.postag(words)
    for k, v in dict(zip(words, postags)).items():
        print(k, v)

    # print(' ## '.join(postags))
    parser = Parser()
    parser.load(pr_model_path)
    arcs = parser.parse(words, postags)
    print(' '.join('%d:%s ' % (arc.head, arc.relation) for arc in arcs))

    print('#' * 8)
    recognizer = NamedEntityRecognizer()
    recognizer.load(ner_model_path)
    netag = recognizer.recognize(words, postags)
    for word, ntag in zip(words, netag):
        if ntag != 'O':
            # print('ntag')
            print(word + ' / ' + netag)
    print(' / '.join(netag))

    # 命名实体识别
    word_list = ['欧几里得', '是', '西元前', '三', '世纪', '的', '希腊', '数学家', '。']
    postags_list = ['nh', 'v', 'nt', 'm', 'n', 'u', 'ns', 'n', 'wp']
    nertags = recognizer.recognize(word_list, postags_list)
    for word, ntag in zip(word_list, nertags):
        if ntag != 'O':
            print(word + '/' + ntag)
    #print (" ".join(word_list))
    print(' '.join(nertags))

    segmentor.release()
    postagger.release()
    parser.release()
    recognizer.release()
示例#14
0
 def __init__(self):
     self.segmentor = Segmentor()
     self.segmentor.load('/home/student/project/project-01/Four-Little-Frogs/ltp_data_v3.4.0/cws.model')
     self.postagger = Postagger()
     self.postagger.load('/home/student/project/project-01/Four-Little-Frogs/ltp_data_v3.4.0/pos.model')
     self.parser = Parser()
     self.parser.load('/home/student/project/project-01/Four-Little-Frogs/ltp_data_v3.4.0/parser.model')
     self.recognizer = NamedEntityRecognizer()
     self.recognizer.load('/home/student/project/project-01/Four-Little-Frogs/ltp_data_v3.4.0/ner.model')
     self.labeller = SementicRoleLabeller()
     self.labeller.load('/home/student/project/project-01/Four-Little-Frogs/ltp_data_v3.4.0/pisrl.model')
示例#15
0
def generate_ltp_results():
    """
    读取train_base.json
    分词,词性标注,NER,(依存,语义角色按需加入)
    然后返回data(源数据),以及处理好的结果的list
    :return:
    """
    modelpath = '../../../ltp_data/data/'

    data = read_file_in_ltp('../data/train_base.json')
    sentences = list(map(lambda x: x['content'], data))


    # 分词
    segmentor = Segmentor()
    segmentor.load(modelpath + 'cws.model')
    segmented = [list(segmentor.segment(x.lower().replace(' ', '_'))) for x in sentences]
    segmentor.release()

    # 词性标注
    postagger = Postagger()
    postagger.load(modelpath + 'pos.model')
    posed = [list(postagger.postag(x)) for x in segmented]
    postagger.release()

    # 命名实体识别
    recognizer = NamedEntityRecognizer()
    recognizer.load(modelpath + 'ner.model')
    nered = [list(recognizer.recognize(x, posed[i])) for (i, x) in enumerate(segmented)]
    recognizer.release()

    # 依存句法分析 todo 依存句法的分析结果是一棵树,无法直接拼接到embedding上,有办法吗。
    # todo 依然要做,因为依存句法分析是语义角色标注的前置
    parser = Parser()
    parser.load(modelpath + 'parser.model')
    arcs = [list(parser.parse(x, posed[i])) for (i, x) in enumerate(segmented)]
    parser.release()


    # 语义角色标注
    # srl_labeller = SementicRoleLabeller()
    # srl_labeller.load(modelpath + 'pisrl_win.model')
    #
    # roles = [list(srl_labeller.label(x, posed[i], arcs[i])) for (i, x) in enumerate(segmented[:500])]
    # srl_labeller.release()
    # pickle.dump(roles, open('roles0-500.pk', 'wb'))
    #
    # print('1\n')
    # print_role(roles[0], segmented[0])
    # print('\n2\n')
    # print_role(roles[1], segmented[1])

    # pickle.dump([segmented, posed, nered, arcs, roles], open('segmented_posed_nered_roles.pk', 'wb'))
    return data, segmented, posed, nered, arcs
def get_models():
    LTP_DATA_DIR = r'ltp_data_v3.4.0'  # LTP模型目录路径
    # 分词
    segmentor = Segmentor()  # 初始化
    segmentor.load(os.path.join(LTP_DATA_DIR, 'cws.model'))  # 加载模型
    # 词性标注
    postagger = Postagger()  # 初始化
    postagger.load(os.path.join(LTP_DATA_DIR, 'pos.model'))  # 加载模型
    parser = Parser()
    parser.load(os.path.join(LTP_DATA_DIR, 'parser.model'))
    return segmentor, postagger, parser
示例#17
0
 def __init__(self):
     self.__clause_list = []
     self.__subclause_dict = {}
     self.__triple_list = []
     self.__segmentor = Segmentor()
     self.__postagger = Postagger()
     self.__recognizer = NamedEntityRecognizer()
     self.__parser = Parser()
     self.__labeller = SementicRoleLabeller()
     self.__words_full_list = []
     self.__netags_full_list = []
 def __init__(self):
     cws_model_path = os.path.join(config.LTP_DATA_DIR, 'cws.model')
     self.segmentor = Segmentor()
     self.segmentor.load_with_lexicon(cws_model_path, config.dic_path)
     pos_model_path = os.path.join(config.LTP_DATA_DIR, 'pos.model')
     self.postagger = Postagger()
     self.postagger.load(pos_model_path)
     par_model_path = os.path.join(config.LTP_DATA_DIR,
                                   'parser.model')
     self.parser = Parser()
     self.parser.load(par_model_path)
示例#19
0
 def parsing(self, sentence):
     words = list(self.pyltp_cut(sentence))  # pyltp分词
     # words=list(jieba.cut(sentence)) #结巴分词
     postags = list(self.postagger.postag(words))  # 词性标注
     # tmp=[str(k+1)+'-'+v for k,v in enumerate(words)]
     # print('\t'.join(tmp))
     parser = Parser()  # 初始化实例
     parser.load(par_model_path)  # 加载模型
     arcs = parser.parse(words, postags)  # 句法分析
     parser.release()  # 释放模型
     return arcs
示例#20
0
 def __init__(self, data_dir: str):
     self.segmentor = Segmentor()
     self.segmentor.load(os.path.join(data_dir, "cws.model"))
     self.postagger = Postagger()
     self.postagger.load(os.path.join(data_dir, "pos.model"))
     self.recognizer = NamedEntityRecognizer()
     self.recognizer.load(os.path.join(data_dir, "ner.model"))
     self.parser = Parser()
     self.parser.load(os.path.join(data_dir, "parser.model"))
     self.labeller = SementicRoleLabeller()
     self.labeller.load(os.path.join(data_dir, "pisrl.model"))
示例#21
0
 def __init__(self, model_path):
     self.model_path = model_path
     self.segmentor = Segmentor()  # 分词初始化实例
     self.segmentor.load_with_lexicon(path.join(self.model_path, 'cws.model'), path.join(self.model_path, 'dictionary_kfc.txt'))
     self.postagger = Postagger() # 词性标注初始化实例
     self.postagger.load(path.join(self.model_path, 'pos.model') ) # 加载模型
     self.recognizer = NamedEntityRecognizer() # 命名实体识别初始化实例
     self.recognizer.load(path.join(self.model_path, 'ner.model'))
     self.parser = Parser() # 依存句法初始化实例 s
     self.parser.load(path.join(self.model_path, 'parser.model'))  # 加载模型
     self.labeller = SementicRoleLabeller() # 语义角色标注初始化实例
     self.labeller.load(path.join(self.model_path, 'srl'))
示例#22
0
 def __init__(self):
     # 初始化实例,加载模型
     self.segmentor = Segmentor()  # 初始化实例
     self.segmentor.load_with_lexicon(
         self.cws_model_path,
         self.special_word_path)  # 加载模型,第二个参数是您的外部词典文件路径
     self.postagger = Postagger()
     self.postagger.load(self.pos_model_path)
     self.recognizer = NamedEntityRecognizer()
     self.recognizer.load(self.ner_model_path)
     self.parser = Parser()
     self.parser.load(self.parser_model_path)
    def __init__(self):
        self.nr_table_name = ''
        self.nrTable = None
        # 词性标注
        pos_model_path = os.path.join(os.path.dirname(__file__), '../data/ltp_data/pos.model')
        self.postagger = Postagger()
        # self.postagger.load(pos_model_path)

        # 依存句法分析
        par_model_path = os.path.join(os.path.dirname(__file__), '../data/ltp_data/parser.model')
        self.parser = Parser()
        self.parser.load(par_model_path)
    def __init__(self):
        self.postagger = Postagger()
        pos_model_path = os.path.join(LTP_DATA_DIR, 'pos.model')
        self.postagger.load(pos_model_path)

        self.recognizer = NamedEntityRecognizer()
        ner_model_path = os.path.join(LTP_DATA_DIR, 'ner.model')
        self.recognizer.load(ner_model_path)

        self.parser = Parser()
        par_model_path = os.path.join(LTP_DATA_DIR, 'parser.model')
        self.parser.load(par_model_path)
示例#25
0
def parse(words, postags):
    parser = Parser()  # 初始化实例
    parser.load(
        '/Users/zhangqinyuan/Downloads/ltp_data_v3.4.0/parser.model')  # 加载模型
    arcs = parser.parse(words, postags)  # 句法分析
    #print ("\t".join("%d:%s" % (arc.head, arc.relation) for arc in arcs))
    i = 0
    for word, arc in zip(words, arcs):
        i = i + 1
        print(str(i) + '/' + str(arc.head) + '/' + str(arc.relation))
    parser.release()  # 释放模型
    return arcs
示例#26
0
    def _load_dataset(self):
        """
        加载数据集:训练集,验证集
        :return:
        """
        par_model_path = os.path.join(self.ltp_dir, 'parser.model')
        pos_model_path = os.path.join(self.ltp_dir, 'pos.model')
        postagger = Postagger()
        postagger.load(pos_model_path)
        parser = Parser()
        parser.load(par_model_path)

        examples = []
        if not os.path.exists(
                os.path.join(
                    self.data_dir,
                    self.file_name.split('.')[0] +
                    '_{}_.pkl'.format(self.class_id))):
            with open(os.path.join(self.data_dir, self.file_name)) as f:
                for l in tqdm(f):
                    l = json.loads(l)
                    # 分词 pos ner : 中文命名实体识别是字符级模型(bert),所以用 list将字符串转换为字符列表。至于输出,格式为 (entity, type, begin, end)。
                    text_seg = jieba.lcut(l['text'], HMM=False)
                    poses = ' '.join(postagger.postag(text_seg)).split()
                    arcs = parser.parse(text_seg, poses)
                    arcses = ' '.join("%d:%s" % (arc.head, arc.relation)
                                      for arc in arcs).split()

                    example = self.align_bert(l, text_seg, arcses)
                    if len(example.events) == 0:
                        continue
                    examples.append(example)
            with open(
                    os.path.join(
                        self.data_dir,
                        self.file_name.split('.')[0] +
                        '_{}_.pkl'.format(self.class_id)), 'wb') as f:
                pickle.dump(examples, f)
                print('saved {}'.format(
                    os.path.join(
                        self.data_dir,
                        self.file_name.split('.')[0] +
                        '_{}_.pkl'.format(self.class_id))))
        else:
            with open(
                    os.path.join(
                        self.data_dir,
                        self.file_name.split('.')[0] +
                        '_{}_.pkl'.format(self.class_id)), 'rb') as f:
                examples = pickle.load(f)

        return examples
示例#27
0
    def __init__(self):
        LTP_DIR = "./ltp_data"
        self.segmentor = Segmentor()
        self.segmentor.load(os.path.join(LTP_DIR, "cws.model"))

        self.postagger = Postagger()
        self.postagger.load(os.path.join(LTP_DIR, "pos.model"))

        self.parser = Parser()
        self.parser.load(os.path.join(LTP_DIR, "parser.model"))

        self.recognizer = NamedEntityRecognizer()
        self.recognizer.load(os.path.join(LTP_DIR, "ner.model"))
示例#28
0
 def __init__(self, dict_path=None):
     super(DepTree, self).__init__()
     print("正在加载LTP模型... ...")
     self.segmentor = Segmentor()
     if dict_path is None:
         self.segmentor.load(os.path.join(MODELDIR, "cws.model"))
     else:
         self.segmentor.load_with_lexicon(os.path.join(MODELDIR, "cws.model"), dict_path)
     self.postagger = Postagger()
     self.postagger.load(os.path.join(MODELDIR, "pos.model"))
     self.parser = Parser()
     self.parser.load(os.path.join(MODELDIR, "parser.model"))
     print("加载模型完毕。")
示例#29
0
    def __init__(self, component_config: Dict[Text, Any] = None):
        super(LanguageAnalysis, self).__init__(component_config)
        self.dimensions = component_config['dimensions']
        ltp_path = component_config.get('ltp_path')

        self.postagger = Postagger()
        self.postagger.load(os.path.join(ltp_path, "pos.model"))

        self.parser = Parser()
        self.parser.load(os.path.join(ltp_path, "parser.model"))

        self.recognizer = NamedEntityRecognizer()
        self.recognizer.load(os.path.join(ltp_path, "ner.model"))
示例#30
0
def parse(sentence):
    '''
    LTP 依存句法分析
    '''
    parser = Parser()
    parser.load('/Users/zt/Documents/ltp_data/parser.model')
    words = segmentor(sentence)
    postags = posttagger(sentence)
    arcs = parser.parse(words, postags)
    res = [(arc.head, arc.relation) for arc in arcs]
    for i in range(len(res)):
        print(words[i], '---', res[i][1], '-->', words[res[i][0] - 1])
    parser.release()