Пример #1
0
def get_parts_of_speech(word):
    """
    Returns a list strings describing parts of speech the given russian word could be.
    The enums are derived from pymorphy2.

    >>> get_parts_of_speech('рогалик')
    ['NOUN']
    >>> get_parts_of_speech('постовой')
    ['ADJF', 'NOUN']
    >>> 'NOUN' in get_parts_of_speech('правил')
    True
    >>> 'ADJF' in get_parts_of_speech('правил')
    False

    :param word: a russian word
    :return: list of pymorphy2 POS enums.
    """
    met = set()
    ret = []
    for p in morph.parse(word):
        # if p.score < .1:
        #     continue
        pos = p.tag.POS
        if pos not in met:
            ret.append(pos)
            met.add(pos)
    return ret
Пример #2
0
def get_initial_forms(form: str, part_filter=None)->list:
    """
    Gets all possible initial forms (there are several of them sometimes) of a given word.
    Optional argument part_filter allows to prune unnecessary ambiguity with part of speech.

    >>> get_initial_forms('Дядя')
    ['дядя']
    >>> get_initial_forms('самых')
    ['самый']
    >>> get_initial_forms('честных')
    ['честной', 'честный']
    >>> get_initial_forms('правил')
    ['правило', 'править']
    >>> get_initial_forms('правил', 'NOUN')
    ['правило']
    >>> get_initial_forms('правил', ['VERB'])
    ['править']

    :param form: a russian word
    :param part_filter: something that supports `in' operator: str, list, set etc. If it is a container,
    it should contain only Part-of-speech names according to pymorphy2 enumerations
    :return: a list of possible initial forms of the given word in lowercase.
    It's guaranteed that there are no repetitions.
    Variants are generated in the order of descending certainty.
    """
    met = set()
    ret = []
    for p in morph.parse(form):
        # if p.score < .1:
        #     continue
        if part_filter is None or p.tag.POS in part_filter:
            norm = p.normal_form
            if norm not in met:
                ret.append(norm)
                met.add(norm)
    return ret
Пример #3
0
def get_valid_noun_initial_form(word: str)->str:
    possible_forms = [p for p in morph.parse(word) if _is_valid_noun(p)]
    if len(possible_forms) == 0:
        return None
    else:
        return possible_forms[0].normal_form