def main(): if sys.argv.__len__() > 1: init_dir_name = os.path.normpath(sys.argv[1]) assert os.path.isdir(init_dir_name), 'Directory `{0}` does not exist!'.format(init_dir_name) all_prompts = sorted(list(get_all_prompts(init_dir_name))) accentor = Accentor() morpho_predictor = RNNMorphPredictor() i = 0 for cur_prompt in all_prompts[:100]: trouble = False unknown_words = [] for cur_subsentence in select_subsentences(cur_prompt): morphotags = ['{0} {1}'.format(cur_morpho.pos, cur_morpho.tag) for cur_morpho in morpho_predictor.predict_sentence_tags(cur_subsentence)] accent_variants = accentor.do_accents(cur_subsentence, morphotags) if len(accent_variants) > 1: trouble = True else: accented_phrase = accent_variants[0] for cur_word in accented_phrase: vowels_counter = 0 for cur_char in cur_word.lower(): if cur_char in VOWEL_LETTERS: vowels_counter += 1 if '+' not in cur_word and vowels_counter > 1: unknown_words += [cur_word] if trouble: print('`{0}`: this phrase cannot be unambiguously accented!'.format(cur_prompt)) i += 1 if unknown_words: for unknown_word in list(set(unknown_words)): print('`{0}`: word `{1}` in this this phrase is unknown!'.format(cur_prompt, unknown_word)) print(i) else: print("Usage: input_directory_with_voxforge_ru")
def __init__(self, batch_size=1): """[summary] Args: batch_size (int, optional): [description]. Defaults to 1. """ self.batch_size = batch_size self.predictor = RNNMorphPredictor(language="ru")
def __init__(self): self._graph = tf.Graph() self._session = tf.Session(graph=self._graph) with self._session.as_default(): with self._graph.as_default(): self.rnnmorph = RNNMorphPredictor(language="ru") self.pymorphy_analyzer = pymorphy2.MorphAnalyzer() self.latin = re.compile("^[0-9]*[A-Za-z]+[0-9]*$") self.cyrillic = re.compile("[А-Яа-яЁе]+")
def prepare_text(text): """ """ words = [ w for w in nltk.word_tokenize(text, language="russian") if w not in punctuation ] predictor = RNNMorphPredictor(language="ru") morphs = predictor.predict(words) return ["{}_{}".format(m.normal_form, m.pos) for m in morphs]
class MorphPredictor(PreProcesser): def __init__(self): self.rnnmorph = RNNMorphPredictor(language='ru') def translit(self, form): return (True, translit(form, 'ru')) if re.match(r'[a-zA-Z]+', form) else (False, form) def transform_sent(self, sent): sent = sent.copy() translit_flags, translit_forms = zip(*[self.translit(token.form) for token in sent.tokens]) morth_forms = self.rnnmorph.predict(translit_forms) for token, morth_form, translit_flag in zip(sent.tokens, morth_forms, translit_flags): token.lemma = token.form.lower() if translit_flag else morth_form.normal_form token.upos = morth_form.pos token.feats = morth_form.tag return sent def transform_item(self, x): return [self.transform_sent(sent) for sent in x]
def pos_tag(self): if self.language == "ru" or self.language == "en": os.environ['CUDA_VISIBLE_DEVICES'] = '-1' predictor = RNNMorphPredictor(language=self.language) sentences = [] for review in self.reviews: for i, sentence in enumerate(review.sentences): words = [word.text for word in sentence] sentences.append(words) sentences_forms = predictor.predict_sentences(sentences, 32, False) offset = 0 for review in self.reviews: for i, sentence in enumerate(review.sentences): forms = sentences_forms[offset + i] for word_idx, form in enumerate(forms): sentence[word_idx] = PosTaggedWord( sentence[word_idx], form.pos, form.tag, [int(j) for j in form.vector]) offset += len(review.sentences) os.environ['CUDA_VISIBLE_DEVICES'] = '0'
def handle_new_messages(self): pr = RNNMorphPredictor() for collection_name in self.db.collection_names(): if collection_name.startswith('Chat'): print('Обрабатываем сообщения чата', collection_name) new_messages = self.db[collection_name].find( {'handled': { "$exists": False }}) for message in new_messages: handled_data = self.handle_message(message, pr) self.db[collection_name].update_one( {'_id': message['_id']}, {'$set': handled_data})
def handle_new_messages(self): pr = RNNMorphPredictor() self.mycursor.execute( "SELECT id, text FROM messages WHERE handled IS NULL") message_records = self.mycursor.fetchall() emoticons_count = 0 words_count = 0 for message_record in message_records: msg_id, text = message_record if text: emoticons = regex.findall(r'\X', text) for emoticon in emoticons: if any(char in emoji.UNICODE_EMOJI for char in emoticon): self.mycursor.execute( f"INSERT INTO emoticons (message_id, emoticon) VALUES (%s, %s);", (msg_id, emoticon)) emoticons_count += 1 sentences = [] for sentence in re.split(r'[.!?]+', re.sub(r'[ёЁ]', 'е', text)): word_list = re.findall( r'[а-яА-ЯёЁ]+-[а-яА-ЯёЁ]+|[а-яА-ЯёЁ]+', sentence) if word_list: sentences.append(word_list) if sentences: pr_sentences = pr.predict_sentences(sentences=sentences) for pr_sentence in pr_sentences: for pr_word in pr_sentence: self.mycursor.execute( f"INSERT INTO words (message_id, word, normal_form, pos, tag) " f"VALUES (%s, %s, %s, %s, %s);", (msg_id, pr_word.word, pr_word.normal_form, pr_word.pos, pr_word.tag)) words_count += 1 self.mycursor.execute( "UPDATE messages SET handled = %s WHERE id = %s", (True, message_record[0])) self.mydb.commit() return len(message_records), words_count, emoticons_count
def main(): try: log('Loading predictor') global PREDICTOR PREDICTOR = RNNMorphPredictor(language="ru") except Exception as error: log('Can not load analyzer: "%s"', error) return server = HTTPServer((HOST, PORT), HTTPHandler) try: log('Listening http://%s:%d', HOST, PORT) server.serve_forever() except KeyboardInterrupt: log('Quiting') finally: server.server_close()
def __process_line(line: str, output_file: TextIO, sentence_splitter: SentenceSplitter, morph_predictor: RNNMorphPredictor): sentences = sentence_splitter.split(line) for sentence in sentences: words = [ token.text for token in Tokenizer.tokenize(sentence) if token.text != '' and token.token_type != Token.TokenType.SPACE ] if not words: continue forms = morph_predictor.predict_sentence_tags(words) for form in forms: if form.pos == "PUNCT": continue output_file.write( "%s\t%s\t%s\t%s\n" % (form.word, form.normal_form, form.pos, form.tag)) output_file.write("\n")
def find_rhyme(src: str, russian_lexemes: dict, rnn_morph: RNNMorphPredictor, phonetic_dict: Dict[str, tuple]) -> List[str]: russian_letters = set('АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя') src_words = list(filter( lambda it2: set(it2) <= russian_letters, map(lambda it1: it1.strip().lower(), word_tokenize(src)) )) if len(src_words) == 0: return [src] morphotags = [get_morphodata(cur.pos + ' ' + cur.tag) for cur in rnn_morph.predict(src_words)] print('morphotags', morphotags) syllables_of_words = [str(calc_number_of_syllables(cur_word)) for cur_word in src_words] print('syllables_of_words', syllables_of_words) variants = [] new_variant = [] for it in select_new_variant(src_words, morphotags, syllables_of_words, russian_lexemes, phonetic_dict, 0, new_variant): variants.append(' '.join(it)) del it return variants
def get_morph_markup(input_filenames: List[str], output_filename: str): """ Разметка по грамматическим значениям :param input_filenames: входные текстовые файлы :param output_filename: путь к файлу, куда будет сохранена разметка """ if os.path.exists(output_filename): os.remove(output_filename) sentence_splitter = SentenceSplitter(language='ru') morph_predictor = RNNMorphPredictor() for filename in input_filenames: with open(filename, "r", encoding="utf-8") as r, open(output_filename, "w+", encoding="utf-8") as w: for line in r: Morph.__process_line(line, w, sentence_splitter, morph_predictor)
class Preprocessor(): def __init__(self): self.predictor = RNNMorphPredictor() def gettags(self, text): analysis = self.predictor.predict_sentence_tags(text) words_and_tags = [] for word in analysis: word_and_tag = [] word_and_tag.append(word.word) word_and_tag.append(word.pos + ' ' + word.tag) words_and_tags.append(word_and_tag) return words_and_tags def preprocessing(self, text): text = sub('[\.\,\?\!\(\);:]+', ' <sil>', text) text = sub(' [–-] |\n', ' <sil> ', text) text = sub('\s{2,}', ' ', text) text = sub('^\s|[\\\/@#~¬`£€\$%\^\&\*–_=+\'\"\|«»–-]+', '', text) words_and_tags = self.gettags(text.split(' ')) return words_and_tags
def tag(predictor: RNNMorphPredictor, untagged_filename: str, tagged_filename: str): sentences = [] with open(untagged_filename, "r", encoding='utf-8') as r: words = [] for line in r: if line != "\n": records = line.strip().split("\t") word = records[1] words.append(word) else: sentences.append([word.lower() for word in words]) words = [] with open(tagged_filename, "w", encoding='utf-8') as w: all_forms = predictor.predict_sentences_tags(sentences) for forms in all_forms: for i, form in enumerate(forms): line = "{}\t{}\t{}\t{}\t{}\n".format(str(i + 1), form.word, form.normal_form, form.pos, form.tag) w.write(line) w.write("\n")
class TaggerEnsemble: def __init__(self): self.predictor = RNNMorphPredictor(language="ru") self.tagger = rupostagger.RuPosTagger() self.tagger.load() #model_file = '/home/inkoziev/polygon/GramEval2020/tmp/udpipe_syntagrus.model' #self.ud_model = Model.load(model_file) #self.ud_pipeline = Pipeline(self.ud_model, 'vertical', Pipeline.DEFAULT, Pipeline.DEFAULT, 'conllu') #self.ud_error = ProcessingError() def tag(self, words): tokens1 = self.tagger.tag(words) tokens2 = self.predictor.predict(words) #processed = self.ud_pipeline.process('\n'.join(words), self.ud_error) #if self.ud_error.occurred(): # print("An error occurred when running run_udpipe: ") # print(self.ud_error.message) # return tokens1 #tokens3 = pyconll.load_from_string(processed)[0] new_tokens = [] for token1, token2 in zip(tokens1, tokens2): tags1 = token1[1].split('|') if tags1[0] == 'NOUN' and 'Case' in token2.tag: tags_rnn = dict( z.split('=') for z in token2.tag.split('|') if '=' in z) new_tagset = list( filter(lambda z: not z.startswith('Case'), tags1)) new_tagset.append(('Case=' + tags_rnn['Case'])) new_tokens.append((token1[0], '|'.join(new_tagset))) else: new_tokens.append(token1) return new_tokens
def __init__(self): self.predictor = RNNMorphPredictor()
#!/usr/bin/env python # coding: utf-8 from nltk import sent_tokenize, word_tokenize import ufal.udpipe from rnnmorph.predictor import RNNMorphPredictor from udpipe_model import Model predictor = RNNMorphPredictor(language="ru") # Download model from https://rusvectores.org/static/models/udpipe_syntagrus.model model_file = 'udpipe_syntagrus.model' model = Model(model_file) def to_conllu(wordforms): lines = [] for i in range(len(wordforms)): line = [ str(i + 1), wordforms[i].word, wordforms[i].normal_form, wordforms[i].pos, wordforms[i].tag ] lines.append('\t'.join(line + ['_'] * 5)) return '\n'.join(lines) def pipeline(sentence): tokens = word_tokenize(sentence) forms = predictor.predict(tokens) sentences = model.read(to_conllu(forms), 'conllu') for s in sentences:
def __init__(self): self.rnnmorph = RNNMorphPredictor(language='ru')
def setUpClass(cls): logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) cls.predictor = RNNMorphPredictor()
def __init__(self): self.predictor = RNNMorphPredictor(language="ru") self.tagger = rupostagger.RuPosTagger() self.tagger.load()
# In[33]: true_pred = sum((1 for t1, t2 in zip(nltk_result, test_sents) for tt1, tt2 in zip(t1, t2) if tt1[1] == tt2[1])) num_pred = sum(1 for s in test_sents for _ in s) print(f"{true_pred / num_pred * 100:.1f}") # In[34]: get_ipython().run_cell_magic('capture', '', '!pip install -q rnnmorph') # In[35]: from rnnmorph.predictor import RNNMorphPredictor predictor = RNNMorphPredictor(language="en") # In[36]: rnnmorph_result = predictor.predict_sentences( [list(map(lambda t: t[0], s)) for s in test_sents]) # In[37]: true_pred = sum((1 for t1, t2 in zip(rnnmorph_result, test_sents) for tt1, tt2 in zip(t1, t2) if tt1.pos == tt2[1])) num_pred = sum(1 for s in test_sents for _ in s) print(f"{true_pred / num_pred * 100:.1f}") # ### Вопрос 7: # * Какое качество вы получили, используя каждую из двух библиотек? Сравните их результаты.
class Preprocessor(): def __init__(self, batch_size=1): self.batch_size = batch_size self.predictor = RNNMorphPredictor(language="ru") def __del__(self): if hasattr(self, 'predictor'): del self.predictor def __copy__(self): cls = self.__class__ result = cls.__new__(cls) result.predictor = self.predictor return result def __deepcopy__(self, memodict={}): cls = self.__class__ result = cls.__new__(cls) result.predictor = self.predictor return result def gettags(self, texts): if not isinstance(texts, list): raise ValueError('Expected `{0}`, but got `{1}`.'.format( type([1, 2]), type(texts))) if len(texts) == 0: return [] all_phonetic_phrases = [] all_phrases_for_rnnmorph = [] for cur_text in texts: list_of_phonetic_phrases = [ cur.strip() for cur in ' '.join(cur_text).split('<sil>') ] united_phrase_for_rnnmorph = [] for phonetic_phrase in list_of_phonetic_phrases: if len(phonetic_phrase) > 0: united_phrase_for_rnnmorph += phonetic_phrase.split() if len(united_phrase_for_rnnmorph) > 0: all_phrases_for_rnnmorph.append(united_phrase_for_rnnmorph) all_phonetic_phrases.append(list_of_phonetic_phrases) else: all_phonetic_phrases.append([]) if len(all_phrases_for_rnnmorph) > 0: all_forms = self.predictor.predict_sentences( all_phrases_for_rnnmorph, batch_size=self.batch_size) else: all_forms = [] all_words_and_tags = [] phrase_ind = 0 for cur in all_phonetic_phrases: words_and_tags = [['<sil>', 'SIL _']] if len(cur) > 0: token_ind = 0 for phonetic_phrase in cur: if len(phonetic_phrase) > 0: n = len(phonetic_phrase.split(' ')) analysis = all_forms[phrase_ind][token_ind:(token_ind + n)] for word in analysis: word_and_tag = [] word_and_tag.append(word.word) word_and_tag.append(word.pos + ' ' + word.tag) words_and_tags.append(word_and_tag) words_and_tags.append(['<sil>', 'SIL _']) token_ind += n phrase_ind += 1 all_words_and_tags.append(words_and_tags) return all_words_and_tags def preprocessing(self, texts): def prepare(src): dst = sub('[\.\,\?\!\(\);:]+', ' <sil>', src.lower()) dst = sub(' [–-] |\n', ' <sil> ', dst) dst = sub('\s{2,}', ' ', dst) dst = sub('^\s|(?<!\w)[\\\/@#~¬`£€\$%\^\&\*–_=+\'\"\|«»–-]+', '', dst) return dst.strip().split(' ') words_and_tags = self.gettags([prepare(cur) for cur in texts]) return words_and_tags
class Preprocessor(): """[summary] """ def __init__(self, batch_size=1): """[summary] Args: batch_size (int, optional): [description]. Defaults to 1. """ self.batch_size = batch_size self.predictor = RNNMorphPredictor(language="ru") def __del__(self): if hasattr(self, 'predictor'): del self.predictor def __copy__(self): cls = self.__class__ result = cls.__new__(cls) result.predictor = self.predictor return result def __deepcopy__(self, memodict={}): cls = self.__class__ result = cls.__new__(cls) result.predictor = self.predictor return result def gettags(self, texts: list) -> list: """Get morpho tags for the `texts` Args: texts (list): List of lists Raises: ValueError: [description] Returns: list: list of lists -- words and motpho tags Example: PreProcess.gettags([['я купил самолёт и ракеты'], ['ух ты']]) [[['<sil>', 'SIL _'], ['я', 'PRON Case=Nom|Number=Sing|Person=1'], ['купил', 'VERB Gender=Masc|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act'], ['самолёт', 'NOUN Case=Acc|Gender=Masc|Number=Sing'], ['и', 'CONJ _'], ['ракеты', 'NOUN Case=Acc|Gender=Fem|Number=Plur'], ['<sil>', 'SIL _']], [['<sil>', 'SIL _'], ['ух', 'INTJ _'], ['ты', 'PRON Case=Nom|Number=Sing|Person=2'], ['<sil>', 'SIL _']]] """ if not isinstance(texts, list): raise ValueError( f'Expected `{type([1, 2])}`, but got `{type(texts)}`.') if len(texts) == 0: return [] all_phonetic_phrases = [] all_phrases_for_rnnmorph = [] for cur_text in texts: list_of_phonetic_phrases = [ cur.strip() for cur in ' '.join(cur_text).split('<sil>') ] united_phrase_for_rnnmorph = [] for phonetic_phrase in list_of_phonetic_phrases: if len(phonetic_phrase) > 0: united_phrase_for_rnnmorph += phonetic_phrase.split() if len(united_phrase_for_rnnmorph) > 0: all_phrases_for_rnnmorph.append(united_phrase_for_rnnmorph) all_phonetic_phrases.append(list_of_phonetic_phrases) else: all_phonetic_phrases.append([]) if len(all_phrases_for_rnnmorph) > 0: all_forms = self.predictor.predict_sentences(all_phrases_for_rnnmorph, \ batch_size=self.batch_size) else: all_forms = [] all_words_and_tags = [] phrase_ind = 0 for cur in all_phonetic_phrases: words_and_tags = [['<sil>', 'SIL _']] if len(cur) > 0: token_ind = 0 for phonetic_phrase in cur: if len(phonetic_phrase) > 0: n = len(phonetic_phrase.split(' ')) analysis = all_forms[phrase_ind][token_ind:(token_ind + n)] for word in analysis: word_and_tag = [] word_and_tag.append(word.word) word_and_tag.append(word.pos + ' ' + word.tag) words_and_tags.append(word_and_tag) words_and_tags.append(['<sil>', 'SIL _']) token_ind += n phrase_ind += 1 all_words_and_tags.append(words_and_tags) return all_words_and_tags def __call__(self, texts: str): """Call the instance like function. Use in pipelines, too.""" return self.preprocessing(texts)[0] def preprocessing(self, texts: str): """[summary] Args: texts (str): Text to preprocess. Returns: list: A list of processed words and tags. """ def prepare(text: str) -> str: """Replace punctuation marks with <sil> tag; remove special symbols.""" text = sub(r'[\.\,\?\!\(\);:]+', ' <sil>', text.lower()) text = sub(r' [–-] |\n', ' <sil> ', text) text = sub(r'\s{2,}', ' ', text) text = sub(r'^\s|(?<!\w)[\\\/@#~¬`£€\$%\^\&\*–_=+\'\"\|«»–-]+', '', text) return text.strip().split(' ') return self.gettags([prepare(cur) for cur in texts])
from rnnmorph.predictor import RNNMorphPredictor from pprint import pprint if __name__ == '__main__': pr = RNNMorphPredictor(language='ru') forms = pr.predict(words=['мама', 'мыла', 'раму']) for i in forms: print('{:<15} {:<10} {}'.format(i.normal_form, i.pos, i.tag)) forms = pr.predict_sentences(sentences=[['Всем', 'привет']]) for i in forms[0]: print('{:<15} {:<10} {}'.format(i.normal_form, i.pos, i.tag)) pprint(forms)
class RNNMorphWrapper: """ Класс предназначен для получения граммемной информации о токенах. """ def __init__(self): self._graph = tf.Graph() self._session = tf.Session(graph=self._graph) with self._session.as_default(): with self._graph.as_default(): self.rnnmorph = RNNMorphPredictor(language="ru") self.pymorphy_analyzer = pymorphy2.MorphAnalyzer() self.latin = re.compile("^[0-9]*[A-Za-z]+[0-9]*$") self.cyrillic = re.compile("[А-Яа-яЁе]+") def _choose_pymorphy_form(self, word, lemma, pos): hypotheses = self.pymorphy_analyzer.parse(word) hyp = None tags_to_add = {} other = "" for hyp in hypotheses: if hyp.normal_form == lemma: break changed_lemma = lemma.replace("ё", "е") if not hyp: return other, tags_to_add, changed_lemma str_tag = str(hyp.tag) if "Surn" in str_tag: other = "фам" changed_lemma = word.lower().replace("ё", "е") elif "Patr" in str_tag: other = "отч" changed_lemma = word.lower().replace( "ё", "е") # у Петрович лемма внезапно Пётр if hyp.tag.transitivity: tags_to_add[TRANSITIVITY] = str(hyp.tag.transitivity) if hyp.tag.animacy and pos == "NOUN": tags_to_add[ANIMACY] = str(hyp.tag.animacy) if hyp.tag.aspect: tags_to_add[ASPECT] = str(hyp.tag.aspect) return other, tags_to_add, changed_lemma def _change_pos(self, token, analysis): if re.match(self.latin, analysis.word): token[GRAMMEM_INFO][PART_OF_SPEECH] = "X" elif analysis.pos == "PUNCT" and re.search(self.cyrillic, analysis.word): token[GRAMMEM_INFO][PART_OF_SPEECH] = "X" else: token[GRAMMEM_INFO][PART_OF_SPEECH] = analysis.pos return token def _gram_info_processing(self, tags_to_add, analysis): gramme_info = {} raw_gram_data = [] if analysis.tag != "_": for tag in analysis.tag.split("|"): gramme_info[tag.split("=")[0].lower()] = tag.split( "=")[1].lower() gramme_info.update(tags_to_add) sorted_gramme_info = { key: gramme_info[key] for key in sorted(gramme_info.keys()) } for key in sorted_gramme_info: raw_gram_data.append(key + "=" + sorted_gramme_info[key]) raw_gram_info = "|".join(raw_gram_data) return sorted_gramme_info, raw_gram_info def _rnnmorph_to_token_dicti(self, token, analysis): additional_info, tags_to_add, changed_lemma = self._choose_pymorphy_form( analysis.word, analysis.normal_form, analysis.pos) sorted_gramme_info, raw_gram_info = self._gram_info_processing( tags_to_add, analysis) token[GRAMMEM_INFO] = sorted_gramme_info token[GRAMMEM_INFO][RAW_GRAM_INFO] = raw_gram_info if additional_info: token[GRAMMEM_INFO][OTHER] = additional_info token = self._change_pos(token, analysis) token[LEMMA] = changed_lemma return token def token_desc_list_processing(self, token_desc_list): """ Получить список токенов с описанием :param: Список из словарей :return: Список из словарей, обогащенный морфологической информацией """ raw_token_list = [token[TEXT] for token in token_desc_list] with self._session.as_default(): with self._graph.as_default(): analyze_result = self.rnnmorph.predict(raw_token_list) res = [] for i in range(len(token_desc_list)): analysis = analyze_result[i] tokenized_element = token_desc_list[i] final_tokenized_element = self._rnnmorph_to_token_dicti( tokenized_element, analysis) res.append(final_tokenized_element) return res def __call__(self, token_desc_list): """ Класс предназначен для забора из RNNMorph + pymorphy2 граммемной информации. На вход принимается список токенов На выходе имеем список токенов с проставленными грамматическими атрибутами :param token_desc_list (list of dicts) :return: final_result (enriched list of dicts) """ final_result = [] sentences = token_list_to_sentences(token_desc_list) for sentence in sentences: final_result.extend(self.token_desc_list_processing(sentence)) if final_result: final_result.append({ TEXT: ".", LEMMA: ".", TOKEN_TYPE: SENTENCE_ENDPOINT_TOKEN, TOKEN_VALUE: { VALUE: "." }, LIST_OF_TOKEN_TYPES_DATA: [{ TOKEN_TYPE: SENTENCE_ENDPOINT_TOKEN, TOKEN_VALUE: { VALUE: "." } }] }) return final_result
def __init__(self, batch_size=1): self.batch_size = batch_size self.predictor = RNNMorphPredictor(language="ru")
def get_morph_predictor(): """ """ return RNNMorphPredictor(language="ru")