def __init__(self, korean_corpus_worker, korean_corpus_splitter, token_min, korean_stop_words, **kwargs): # Complete try: # ------------- # Twitter self.tagger = corpus.Okt() # self.tagger_options = dict(stem=bool(kwargs.get('stem', True))) # ------------- # Mecab # self.tagger = corpus.Mecab() # self.tagger_options = dict(flatten=bool(kwargs.get('flatten', True))) except: say.error('Could not load korean corpus') raise KoreanCorpusException( TextRankException('Could not load korean corpus')) self.korean_corpus_tags = korean_corpus_worker self.delimiters = korean_corpus_splitter self.min_token_length = token_min self.keyword_stop_lists = korean_stop_words self.sentence_splitter = self.__sentences_splitter() # Configurations Korean Corpus # Option [1. mecab, 2. twitter] # self.pos = lambda sentence: self.tagger.pos(sentence, **self.tagger_options) self.pos = lambda sentence: self.tagger.pos(sentence, stem=False)
def tagger_load(self): name = self.tagger_name if name == 'Okt': self.tagger = tag.Okt() elif name == 'Kkma': self.tagger = tag.Kkma() elif name == 'Komoran': self.tagger = tag.Komoran() else: self.tagger = tag.Hannanum() print("load tagger")
def __init__(self, pre_trained=True, analyzer='Hannanum'): self.pre_trained = pre_trained if analyzer == 'Hannanum': self.analyzer = tag.Hannanum() elif analyzer == 'Kkma': self.analyzer = tag.Kkma() elif analyzer == 'Komoran': self.analyzer = tag.Komoran() elif analyzer == 'Mecab': self.analyzer = tag.Mecab() elif analyzer == 'Okt': self.analyzer = tag.Okt() else: if pre_trained == False: pass else: print('Enter a valid KoNLPy analyzer name.\n\tavailable: Hannanum, Kkma, Komoran, Mecab, Okt') self.WordExtractor = WordExtractor(min_frequency=0) self.noun_extractor = LRNounExtractor(verbose=False) self.word_score = {}
def __init__(self): import konlpy from konlpy import tag print("Initialize Okt from konlpy ({})".format(konlpy.__version__), file=sys.stderr) self._mdl = tag.Okt()
def decompose(titles): okt = tag.Okt() for title in titles: print(okt.morphs(title))
def test(titles): test_title_list = titles[:20] okt = tag.Okt() for title in test_title_list: print(okt.pos(title))