示例#1
0
class pyltp_worker(object):

    #初始化,创建实例,加载基础模型
    def __init__(self, model_path):
        self.LTP_MODEL_DIR = model_path
        self.segmentor = Segmentor()  #分词
        self.postagger = Postagger()  #词性标注
        self.recognizer = NamedEntityRecognizer()  #命名实体识别
        self.parser = Parser()  #依存句法分析
        self.load_model()

    #加载基础模型
    def load_model(self):
        self.cws_model_path = os.path.join(self.LTP_MODEL_DIR,
                                           'cws.model')  #分词模型路径
        self.pos_model_path = os.path.join(self.LTP_MODEL_DIR,
                                           'pos.model')  #词性标注模型路径
        self.ner_model_path = os.path.join(self.LTP_MODEL_DIR,
                                           'ner.model')  #命名实体识别模型路径
        self.par_model_path = os.path.join(self.LTP_MODEL_DIR,
                                           'parser.model')  #依存句法分析模型路径
        self.segmentor.load(self.cws_model_path)  #加载cws模型
        self.postagger.load(self.pos_model_path)  #加载pos模型
        self.recognizer.load(self.ner_model_path)  #加载ner模型
        self.parser.load(self.par_model_path)  #加载parser模型

    #释放实例
    def end(self):
        self.segmentor.release()  #分词
        self.postagger.release()  #词性标注
        self.recognizer.release()  #命名实体识别
        self.parser.release()  #依存句法分析

    #加入自定义词典

    def add_cws_userdict(self, lexicon_path):
        self.segmentor.load_with_lexicon(lexicon_path)

    def add_pos_userdict(self, lexicon_path):
        self.postagger.load_with_lexicon(lexicon_path)

    def add_ner_userdict(self, lexicon_path):
        self.recognizer.load_with_lexicon(lexicon_path)

    def add_par_userdict(self, lexicon_path):
        self.parser.load_with_lexicon(lexicon_path)

    #分句。按照标点符号来分,返回句子列表。
    def sentsplit(self, text):
        sentences = SentenceSpliter.split(text)
        sentences_list = list(sentences)
        return sentences_list

    #分词。返回词列表。
    def cws(self, text):
        words = self.segmentor.segment(text)
        words_list = list(words)
        return words_list

    #词性标注。返回词性标注列表。
    def pos(self, words):
        postags = self.postagger.postag(words)
        postags_list = list(postags)
        return postags_list

    #命名实体识别。返回命名实体类型列表。
    def ner(self, words, postags):
        nertags = self.recognizer.recognize(words, postags)
        nertags_list = list(nertags)
        return nertags_list

    #依存句法分析。
    def par(self, words, postags):
        arcs = self.parser.parse(words, postags)
        pr_list = []
        word_list = []
        word_pos_list = []
        source_list = []
        source_pos_list = []
        relation_list = []
        for i, k in enumerate(arcs):
            word = words[i]
            word_pos = postags[i]
            source = words[k.head - 1]
            source_pos = postags[k.head - 1]
            relation = k.relation
            word_list.append(word)
            word_pos_list.append(word_pos)
            source_list.append(source)
            source_pos_list.append(source_pos)
            relation_list.append(relation)
            pr_list.append([word, word_pos, source, source_pos, relation])
        df_list = [
            word_list, word_pos_list, source_list, source_pos_list,
            relation_list
        ]
        return pr_list, df_list, arcs