Python Morfeusz.Morfeusz примеры использования

Язык программирования: Python

Пространство имен/Пакет: morfeusz2

Класс/Тип: Morfeusz

Метод/Функция: Morfeusz

Примеров на hotexamples.com: 7

Python Morfeusz.Morfeusz - 7 примеров найдено. Это лучшие примеры Python кода для morfeusz2.Morfeusz.Morfeusz, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Morfeusz(7)

analyse(7)

dict_copyright(1)

dict_id(1)

generate(1)

Пример #1

Показать файл

    def __init__(self):
        super(PolishLemmatizer, self).__init__()
        try:
            from morfeusz2 import Morfeusz
        except ImportError:
            raise ImportError(
                'The Polish lemmatizer requires the morfeusz2-python library')

        if PolishLemmatizer._morph is None:
            PolishLemmatizer._morph = Morfeusz(dict_name='polimorf')

Пример #2

Показать файл

def process_request(params):
    option_parser = MorfeuszOptionParser(params)
    option_parser.parse_bool('expandDag', 'expand_dag')
    option_parser.parse_bool('expandTags', 'expand_tags')
    option_parser.parse_bool('expandDot', 'expand_dot')
    option_parser.parse_bool('expandUnderscore', 'expand_underscore')
    option_parser.parse_string('agglutinationRules', 'aggl',
                               AGGLUTINATION_RULES)
    option_parser.parse_string('pastTenseSegmentation', 'praet',
                               PAST_TENSE_SEGMENTATION)
    option_parser.parse_enum('tokenNumbering', 'separate_numbering',
                             TokenNumbering, TokenNumbering.separate)
    option_parser.parse_enum('caseHandling', 'case_handling', CaseHandling)
    option_parser.parse_enum('whitespaceHandling', 'whitespace',
                             WhitespaceHandling)
    option_parser.parse_actions('action')

    results = []
    response = {'results': results}

    if option_parser.validate(response):
        option_parser.set_dictionary_path('MORFEUSZ_DICT_PATH')
        morfeusz = Morfeusz(**option_parser.get_opts())

        if option_parser.action == 'analyze':
            for interp_list in morfeusz.analyse(option_parser.text):
                if isinstance(interp_list, list):
                    subitem = []
                    results.append(subitem)

                    for item in interp_list:
                        subitem.append(tag_items(item))
                else:
                    results.append(tag_items(interp_list))
        elif option_parser.action == 'generate':
            for title in option_parser.titles:
                subitem = []
                results.append(subitem)

                for interp_list in morfeusz.generate(title):
                    subitem.append(tag_items(interp_list))

        response['version'] = morfeusz2.__version__
        response['dictionaryId'] = morfeusz.dict_id()
        response['copyright'] = morfeusz.dict_copyright()

    return response

Пример #3

Показать файл

from sys import argv, exit
#other imports

corpus_filename = 'pl.txt'
try:
    filename = argv[1]
    if filename in listdir('.'):
        corpus_filename = filename
    else:
        print('File %s not found in the current directory' % filename)
        exit(-1)
except IndexError:
    pass

exclude = string.digits  #unicode(string.digits) #
morph = Morfeusz()


def lemm(line):
    sentence = re.split(
        '\d+|\W+|_', line.lower(), flags=re.UNICODE
    )  #re.split('\W+', line.lower(), flags=re.UNICODE) #line.split()
    norm_sentence = []
    for i in xrange(0, len(sentence)):
        if sentence[i] != u'':
            #print 'sen: ', sentence[i], 'len: ', len(sentence[i])
            w_desc = morph.analyse(sentence[i])
            if len(w_desc) > 0:
                norm_sentence.append(w_desc[0][2][1].split(':')[0])
    return norm_sentence

Пример #4

Показать файл

#! /usr/bin/python
# *-* coding: utf-8 *-*

from morfeusz2 import Morfeusz
from concraft_pl2 import Concraft, Server

try:
  morfeusz = Morfeusz(expand_tags=True)
  server = Server(model_path="/home/kuba/work/ipipan/concraft/pre-trained/Sep-18/model-04-09-2018.gz", port=3001)
  concraft = Concraft(port=3001)
  
  dag = morfeusz.analyse(u'W Szczebrzeszynie chrząszcz brzmi w trzcinie.')
  res = concraft.disamb(dag)
  print(res)
  
  dag = morfeusz.analyse(u'W Szczebrzeszynie chrząszcz brzmi w trzcinie.')
  dag_str = concraft.dag_to_str(dag)
  dag_disamb_str = concraft.disamb_str(dag_str)
  print(dag_disamb_str)
finally:
  server.terminate()

Пример #5

Показать файл

import pathlib, random, re, sys
from typing import Callable, Optional

from morfeusz2 import Morfeusz
from wordnet import query

morfeusz = Morfeusz(analyse=False)

DATASETS = ["new"]
DICT_LINES = {}
DICT_FUNCTIONS = {}

THESAURUS = {}

# Words from the thesaurus containing these tags will be ignored:
BLACKLISTED_TAGS = [
	"(bardzo potocznie)",
	"(potocznie)",
	"(częściej, ale wg niektórych niepoprawnie)",
	"(eufemistycznie)",  # :(
	#"(nieco potocznie)",  # Eh, it's fine
	"(obraźliwe)",
	"(obraźliwie)",
	#"(pieszczotliwie)",
	"(pogardliwie)",
	"(potoczne)",
	"(potocznie)",
	"(przestarzale)",
	"(ptoocznie)",
	"(regionalnie)",  # Contains some inappropriate words
	"(rzadko, wg niektórych niepoprawnie)",

Пример #6

Показать файл

 def __init__(self):
     """Constructor"""
     self.morf = Morfeusz()

Пример #7

Показать файл

    r'<([a-z]*-[a-z]*)(\s+[a-z]*="\w*((\s+\w*)+)?")?>(.+?)<\/[a-z]*-[a-z]*>',
    letter_contents)
# print('one word tags')
# for tag in tags_word:
#     print(tag)
#     print('\n')
# print('two word tags')
# for tag in tags_words:
#     print(tag)
#     print('\n')


def remove_dashes(text):
    tmp_str = ''
    for letter in text:
        if letter != '-':
            tmp_str = tmp_str + letter
    return tmp_str


letter1_no_tags = remove_tags(letters[0].contents)
letter1_nt_str = ' '.join(letter1_no_tags)
# letter_ntnd_str = remove_dashes(letter1_nt_str.decode('utf-8'))
# print(letter_ntnd_str)

morf = Morfeusz()
# print(morf)
print(letter1_nt_str.decode('utf8'))
letter1_analysed = morf.analyse(letter1_nt_str)
print(letter1_analysed)