示例#1
0
class Morph :
    def __init__(self, nlpEngine = "Mecab"):
        '''e
        원하는 형태소 분석기 엔진으로 형태소 분석기 생성
        :param nlpEngine: 형태소 분석기 이름(첫글자 대문자) str
        '''
        self.nlpEngine = nlpEngine
        if nlpEngine == "Okt":
            self.nlp = Okt()
        elif nlpEngine == "Komoran":
            self.nlp = Komoran()
        elif nlpEngine == "Kkma":
            self.nlp = Kkma()
        elif nlpEngine == "Hannanum":
            self.nlp = Hannanum()
        elif nlpEngine == "Mecab":
            self.nlp = Mecab()
        elif nlpEngine == "Twitter":
            self.nlp = Twitter()
        else:
            raise NameError("unknown nlp name")

    def get_morphs(self, sentence, norm=True, stem=True, join=False):
        if self.nlpEngine == "Mecab":
            try :
                a = self.nlp.parse(sentence)
                b = [aa.split(',') for aa in a.split('\n')][:-2]
                if stem :
                    s = [[d[3],d[0].split('\t')[1]] for d in b]
                else :
                    s = [d[0].split('\t') for d in b]
                if join :
                    j = ['/'.join(ss) for ss in s]
                else :
                    j = [tuple(ss) for ss in s]
                return j
            except :
                return []
        else :
            return self.nlp.pos(sentence, norm=norm, stem=stem, join=join)

    def get_nouns(self,text):
        if self.nlpEngine == "Mecab" :
            all_tags_raw1 = self.nlp.parse(text).split("\n")
            all_tags_raw2 = [tt.split(",")[0] for tt in all_tags_raw1]
            all_tags = [t.split("\t") for t in all_tags_raw2]
            nounList = []
            for tags in all_tags :
                if len(tags) == 2 :
                    if tags[1][0:2] in ['NN','NP'] :
                        nounList.append(tags[0])
            print(nounList)
            return nounList
        else :
            return self.nlp.nouns(text)

    def get_nouns(self,text):
        if self.nlpEngine == "Mecab" :
            all_tags_raw1 = self.nlp.parse(text).split("\n")
            all_tags_raw2 = [tt.split(",")[0] for tt in all_tags_raw1]
            all_tags = [t.split("\t") for t in all_tags_raw2]
            nounList = []
            for tags in all_tags :
                if len(tags) == 2 :
                    if tags[1][0:2] in ['NN','NP'] :
                        nounList.append(tags[0])
            print(nounList)
            return nounList
        else :
            return self.nlp.nouns(text)