Python Mystem.start примеры использования

Язык программирования: Python

Пространство имен/Пакет: pymystem3

Класс/Тип: Mystem

Метод/Функция: start

Примеров на hotexamples.com: 5

Python Mystem.start - 5 примеров найдено. Это лучшие примеры Python кода для pymystem3.Mystem.start, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Mystem(30)

analyze(30)

lemmatize(30)

start(5)

close(3)

make_sentence(1)

Пример #1

Показать файл

Файл: data_cleaner.py Проект: VinterMute/TrueTeam_clusterer

def normalisation(new_data):
    m = Mystem()
    m.start()
    normalize_data = []
    data_size = len(new_data)
    count = 0
    for i in tqdm(new_data, desc="normalisation"):
        lemmas = m.lemmatize(i)
        normalize_data.append((''.join(lemmas)).replace("\n", ""))
        count = count + 1


#         print(count*100/data_size)
    return normalize_data

Пример #2

Показать файл

Файл: summary.py Проект: svinkapeppa/mentorhack

import torch
from pymystem3 import Mystem

from itertools import count

import onmt.io
import onmt.translate
import onmt
import onmt.ModelConstructor
import onmt.modules
import sys

from onmt.io.IO import build_dataset_request

m = Mystem()
m.start()

opt = Namespace(
    alpha=0.0,
    attn_debug=False,
    batch_size=1,
    beam_size=10,
    beta=-0.0,
    data_type='text',
    dump_beam='',
    dynamic_dict=False,
    gpu=-1,
    max_length=100,
    max_sent_length=None,
    min_length=0,
    model=os.path.dirname(os.path.abspath(__file__)) +

Пример #3

Показать файл

Файл: 03-topics modelling.py Проект: rita7798/3rd-year-coursepaper

import re, os
import gensim
from gensim import corpora, models
import nltk
from nltk import FreqDist
from nltk.collocations import *
from pymystem3 import Mystem
from stop_words import get_stop_words

ru_stop = get_stop_words('ru')

mystem_object = Mystem()
mystem_object.start()

puncts = "[«–»—!\$%&'()*+,./:;<=>?@^_`{|}~']*-–—...]"
extra_words = [
    "понимать", "знать", "хотеть", "глаз", "рука", "голова", "увидеть",
    "что-то", "смотреть", "нога", "свой", 'видеть', 'становиться', 'остаться',
    'давать', 'стоять', 'оставаться', 'оказываться', 'думать'
]


#Fantasy
def processFileFantasy(file):
    doc = []
    with open(file, 'r', encoding='utf-8') as f:
        #print(file)
        text = f.read()
        #print(len(text))
        words = text.split()
        for word in words:

Пример #4

Показать файл

Файл: preprocessing.py Проект: KoalaMary/diploma

 def make_lemmantisation(text):
     m = Mystem()
     m.start()
     lemmas = m.lemmatize(str(text))
     return lemmas

Пример #5

Показать файл

parser = argparse.ArgumentParser(description='RE to CONLL')
parser.add_argument('--re', type=str, help='REs to apply')
parser.add_argument('--data_dir', type=str, help='Folder with docs')
parser.add_argument('--file', type=str, help='Source file')
parser.add_argument('--lines', action='store_true', help='Lines as docs')
parser.add_argument('--bioes', action='store_true', help='Output BEOES encoding')
args = parser.parse_args()

def build_re():
	patterns = filter(lambda line : line and not line.startswith('#') and not line.isspace(), open(args.re).readlines())
	return map(lambda line: re.compile(line.strip().decode('utf8'), flags=re.U+re.M+re.S), patterns)

from pymystem3 import Mystem
mystem = Mystem(grammar_info=False, disambiguation=False)
mystem.start()

def parse_doc(mystem, text):
	morph_parse = mystem.analyze(text)
	current_pos = 0
	offsets = []
	lemmas = []
	words = []
	all_words = []
	for word_parse in morph_parse:
		word = word_parse['text']
		all_words.append(word)
		sword = word.strip(' ').replace('\n', u'\u2028')
		if re.search("\w", sword, flags=re.U):
			words.append(sword)
			analysis = word_parse.get('analysis')