Пример #1
0
def test_name():
    Name = fact(
        'Name',
        ['first', 'last']
    )

    gnc = gnc_relation()

    FIRST = gram('Name').interpretation(
        Name.first.inflected()
    ).match(gnc)

    LAST = gram('Surn').interpretation(
        Name.last.inflected()
    ).match(gnc)

    NAME = rule(
        FIRST,
        LAST
    ).interpretation(Name)

    parser = Parser(NAME)
    match = parser.match('саше иванову')
    assert match.fact == Name(first='саша', last='иванов')

    match = parser.match('сашу иванову')
    assert match.fact == Name(first='саша', last='иванова')

    match = parser.match('сашу ивановой')
    assert not match
Пример #2
0
def test_person():
    Name = fact(
        'Name',
        ['first', 'last'],
    )
    Person = fact('Person', ['position', 'name'])

    LAST = and_(
        gram('Surn'),
        not_(gram('Abbr')),
    )
    FIRST = and_(
        gram('Name'),
        not_(gram('Abbr')),
    )

    POSITION = morph_pipeline(['управляющий директор', 'вице-мэр'])

    gnc = gnc_relation()
    NAME = rule(
        FIRST.interpretation(Name.first).match(gnc),
        LAST.interpretation(Name.last).match(gnc)).interpretation(Name)

    PERSON = rule(
        POSITION.interpretation(Person.position).match(gnc),
        NAME.interpretation(Person.name)).interpretation(Person)

    parser = Parser(PERSON)

    match = parser.match('управляющий директор Иван Ульянов')
    assert match

    assert match.fact == Person(position='управляющий директор',
                                name=Name(first='Иван', last='Ульянов'))
Пример #3
0
def test_bnf():
    from yargy.interpretation import fact
    from yargy.relations import gnc_relation

    F = fact('F', ['a'])
    gnc = gnc_relation()

    assert_bnf(rule('a').named('A').interpretation(F), "F -> 'a'")
    assert_bnf(
        rule('a').interpretation(F.a).interpretation(F), 'F -> F.a',
        "F.a -> 'a'")
    assert_bnf(rule('a').match(gnc).interpretation(F.a), "F.a^gnc -> 'a'")
    assert_bnf(
        rule('a').interpretation(F.a).repeatable(), 'R0 -> F.a | F.a R0',
        "F.a -> 'a'")
    assert_bnf(
        rule('a').repeatable().interpretation(F.a), 'F.a -> R1',
        "R1 -> 'a' | 'a' R1")
Пример #4
0
def test_bnf():
    from yargy.interpretation import fact
    from yargy.relations import gnc_relation

    F = fact('F', ['a'])
    gnc = gnc_relation()

    assert_bnf(
        rule('a').named('A').interpretation(F),
        "F -> 'a'"
    )
    assert_bnf(
        rule('a').interpretation(F.a).interpretation(F),
        'F -> F.a',
        "F.a -> 'a'"
    )
    assert_bnf(
        rule('a').match(gnc).interpretation(F.a),
        "F.a^gnc -> 'a'"
    )
    assert_bnf(
        rule('a').interpretation(F.a).repeatable(),
        'R0 -> F.a R0 | F.a',
        "F.a -> 'a'"
    )
    assert_bnf(
        rule('a').repeatable().interpretation(F.a),
        'F.a -> R1',
        "R1 -> 'a' R1 | 'a'"
    )

    A = rule('a')
    B = A.named('B')
    C = A.named('C')
    D = rule(B, C).named('D')
    assert_bnf(
        D,
        'D -> B C',
        'B -> R0',
        'C -> R0',
        "R0 -> 'a'"
    )
Пример #5
0
def test_person():
    Name = fact(
        'Name',
        ['first', 'last'],
    )
    Person = fact('Person', ['position', 'name'])

    LAST = and_(
        gram('Surn'),
        not_(gram('Abbr')),
    )
    FIRST = and_(
        gram('Name'),
        not_(gram('Abbr')),
    )

    class PositionPipeline(MorphPipeline):
        grammemes = {'Position'}
        keys = ['управляющий директор', 'вице-мэр']

    POSITION = gram('Position')

    gnc = gnc_relation()

    NAME = rule(
        FIRST.match(gnc).interpretation(Name.first),
        LAST.match(gnc).interpretation(Name.last)).interpretation(Name)

    PERSON = rule(POSITION.interpretation(Person.position),
                  NAME.interpretation(Person.name)).interpretation(Person)

    parser = Parser(PERSON, pipelines=[PositionPipeline()])

    matches = list(parser.match('управляющий директор Иван Ульянов'))
    assert len(matches) == 1

    assert matches[0].fact == Person(position='управляющий директор',
                                     name=Name(first='Иван', last='Ульянов'))
Пример #6
0
# coding: utf-8
from __future__ import unicode_literals

from yargy import (rule, and_, or_, not_, fact)

from yargy.predicates import (caseless, normalized, eq, length_eq, gram,
                              dictionary, is_single, is_title)

from yargy.relations import gnc_relation

Location = fact(
    'Location',
    ['name'],
)

gnc = gnc_relation()

REGION = rule(
    gram('ADJF').match(gnc),
    dictionary({
        'край',
        'район',
        'область',
        'губерния',
        'уезд',
    }).match(gnc),
).interpretation(Location.name.inflected())

gnc1 = gnc_relation()
gnc2 = gnc_relation()
Пример #7
0
        tagger = CrfTagger(NAME_MODEL, get_name_features)
        super(nameExtractor, self).__init__(NAME, tagger=tagger)


###

### Dictionaries
IN_FIRST = dictionary(set(load_dict('first.txt')))
IN_MAYBE_FIRST = dictionary(set(load_dict('maybe_first.txt')))
IN_LAST = dictionary(set(load_dict('last.txt')))
###

Name = fact('Name', ['first', 'middle', 'last', 'nick'])

################# UNMODIFIED NATASHA #################
gnc = gnc_relation()

########
#
#   FIRST
#
########

TITLE = is_capitalized()

NOUN = gram('NOUN')
NAME_CRF = tag('I')

ABBR = gram('Abbr')
SURN = gram('Surn')
NAME = and_(gram('Name'), not_(ABBR))
Пример #8
0
from yargy.predicates import (
    caseless, normalized,
    eq, length_eq,
    gram, dictionary,
    is_single, is_title
)
from yargy.relations import gnc_relation


Location = fact(
    'Location',
    ['name'],
)


gnc = gnc_relation()

REGION = rule(
    gram('ADJF').match(gnc),
    dictionary({
        'край',
        'район',
        'область',
        'губерния',
        'уезд',
    }).match(gnc),
).interpretation(Location.name.inflected())

gnc = gnc_relation()

FEDERAL_DISTRICT = rule(