示例#1
0
    def __init__(self, korean_corpus_worker, korean_corpus_splitter, token_min,
                 korean_stop_words, **kwargs):
        # Complete
        try:
            # -------------
            # Twitter
            self.tagger = corpus.Okt()
            # self.tagger_options = dict(stem=bool(kwargs.get('stem', True)))
            # -------------
            # Mecab
            # self.tagger = corpus.Mecab()
            # self.tagger_options = dict(flatten=bool(kwargs.get('flatten', True)))
        except:
            say.error('Could not load korean corpus')
            raise KoreanCorpusException(
                TextRankException('Could not load korean corpus'))
        self.korean_corpus_tags = korean_corpus_worker
        self.delimiters = korean_corpus_splitter
        self.min_token_length = token_min
        self.keyword_stop_lists = korean_stop_words

        self.sentence_splitter = self.__sentences_splitter()
        # Configurations Korean Corpus
        # Option [1. mecab, 2. twitter]
        # self.pos = lambda sentence: self.tagger.pos(sentence, **self.tagger_options)
        self.pos = lambda sentence: self.tagger.pos(sentence, stem=False)
示例#2
0
 def tagger_load(self):
     name = self.tagger_name
     if name == 'Okt':
         self.tagger = tag.Okt()
     elif name == 'Kkma':
         self.tagger = tag.Kkma()
     elif name == 'Komoran':
         self.tagger = tag.Komoran()
     else:
         self.tagger = tag.Hannanum()
     print("load tagger")
示例#3
0
    def __init__(self, pre_trained=True, analyzer='Hannanum'):
        self.pre_trained = pre_trained

        if analyzer == 'Hannanum':
            self.analyzer = tag.Hannanum()
        elif analyzer == 'Kkma':
            self.analyzer = tag.Kkma()
        elif analyzer == 'Komoran':
            self.analyzer = tag.Komoran()
        elif analyzer == 'Mecab':
            self.analyzer = tag.Mecab()
        elif analyzer == 'Okt':
            self.analyzer = tag.Okt()
        else:
            if pre_trained == False:
                pass
            else:
                print('Enter a valid KoNLPy analyzer name.\n\tavailable: Hannanum, Kkma, Komoran, Mecab, Okt')

        self.WordExtractor = WordExtractor(min_frequency=0)
        self.noun_extractor = LRNounExtractor(verbose=False)
        self.word_score = {}
示例#4
0
 def __init__(self):
     import konlpy
     from konlpy import tag
     print("Initialize Okt from konlpy ({})".format(konlpy.__version__),
           file=sys.stderr)
     self._mdl = tag.Okt()
示例#5
0
def decompose(titles):
    okt = tag.Okt()
    for title in titles:
        print(okt.morphs(title))
示例#6
0
def test(titles):
    test_title_list = titles[:20]
    okt = tag.Okt()
    for title in test_title_list:
        print(okt.pos(title))