예제 #1
0
def test_pipeline():
    RULE = rule(pipeline(['a b c', 'b c']), 'd')
    parser = Parser(RULE)
    assert parser.match('b c d')
    assert parser.match('a b c d')

    RULE = rule(pipeline(['a b']).repeatable(), 'c')
    parser = Parser(RULE)
    assert parser.match('a b a b c')

    RULE = rule(caseless_pipeline(['A B']), 'c')
    parser = Parser(RULE)
    assert parser.match('A b c')

    RULE = morph_pipeline([
        'текст',
        'текст песни',
        'материал',
        'информационный материал',
    ])
    parser = Parser(RULE)
    matches = list(parser.findall('текстом песни музыкальной группы'))
    assert len(matches) == 1
    match = matches[0]
    assert [_.value for _ in match.tokens] == ['текстом', 'песни']

    matches = list(parser.findall('информационного материала под названием'))
    assert len(matches) == 1
    match = matches[0]
    assert [_.value for _ in match.tokens] == ['информационного', 'материала']

    RULE = morph_pipeline(['1 B.'])
    parser = Parser(RULE)
    assert parser.match('1 b .')
예제 #2
0
def get_rules():
    RU = type('RU')
    INT = type('INT')
    NONE = type('NONE')
    NOUN = gram('NOUN')
    ADJF = gram('ADJF')
    NAME = gram('Name')
    PREP = gram('PREP')
    #GEO=gram('Geox')
    GEO = rule(
        and_(
            gram('Geox'),
            not_(
                or_(
                    eq('артема'),
                    eq('фармана'),
                    eq('оскол'),
                    eq('мунарева'),
                ))))

    NAME_OR_NOUN = or_(NAME, NOUN)

    CITY = morph_pipeline(['город', 'Нижний', 'новгород'])

    CITY_EXEP = rule(morph_pipeline(['артем', 'фармана', 'оскол']))

    CITY_NOT = rule(not_(or_(eq('артем'), eq('фармана'), eq('оскол'), INT)))

    CITY_PITER = rule(eq('санкт'), eq('-'), eq('петербург'))

    COMPLICATED_CITY = or_(rule(CITY.optional(), GEO), CITY_PITER)

    FINAL_CITY = or_(COMPLICATED_CITY)
    return FINAL_CITY
    def __init__(self, logger=None, env='local'):

        self.env = env

        if logger is None:
            self.logger = logging.getLogger("OGRNExtractor")
            self.logger.setLevel(logging.DEBUG)
            handler = RotatingFileHandler("ogrn_extractor.log",
                                          mode='a',
                                          encoding='utf-8',
                                          backupCount=5,
                                          maxBytes=1 * 1024 * 1024)
            formatter = logging.Formatter(
                '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
            handler.setFormatter(formatter)
            self.logger.addHandler(handler)
        else:
            self.logger = logger

        self.tokenizer = MorphTokenizer()

        OGRN = morph_pipeline([
            'огрн', 'основной государственный регистрационный номер', 'огрнип'
        ])

        INT = type('INT')

        OGRN_NUMBER = rule(OGRN, INT)

        self.full_ogrn_parser = Parser(OGRN_NUMBER)
        self.ogrn_num_parser = Parser(rule(INT))
def test_type_errors():
    F = fact('F', ['a'])
    RULE = rule(
        'a',
        eq('1').interpretation(
            custom(int)
        )
    ).interpretation(
        F.a
    )
    parser = Parser(RULE)
    match = parser.match('a 1')
    with pytest.raises(TypeError):
        match.fact

    F = fact('F', ['a'])
    RULE = rule(
        'a',
        eq('1').interpretation(
            custom(int)
        )
    ).interpretation(
        custom(str)
    )
    parser = Parser(RULE)
    match = parser.match('a 1')
    with pytest.raises(TypeError):
        match.fact
예제 #5
0
def test_person():
    Name = fact(
        'Name',
        ['first', 'last'],
    )
    Person = fact('Person', ['position', 'name'])

    LAST = and_(
        gram('Surn'),
        not_(gram('Abbr')),
    )
    FIRST = and_(
        gram('Name'),
        not_(gram('Abbr')),
    )

    POSITION = morph_pipeline(['управляющий директор', 'вице-мэр'])

    gnc = gnc_relation()
    NAME = rule(
        FIRST.interpretation(Name.first).match(gnc),
        LAST.interpretation(Name.last).match(gnc)).interpretation(Name)

    PERSON = rule(
        POSITION.interpretation(Person.position).match(gnc),
        NAME.interpretation(Person.name)).interpretation(Person)

    parser = Parser(PERSON)

    match = parser.match('управляющий директор Иван Ульянов')
    assert match

    assert match.fact == Person(position='управляющий директор',
                                name=Name(first='Иван', last='Ульянов'))
예제 #6
0
def test_type_errors():
    F = fact('F', ['a'])
    RULE = rule(
        'a',
        eq('1').interpretation(
            custom(int)
        )
    ).interpretation(
        F.a
    )
    parser = Parser(RULE)
    match = parser.match('a 1')
    with pytest.raises(TypeError):
        match.fact

    F = fact('F', ['a'])
    RULE = rule(
        'a',
        eq('1').interpretation(
            custom(int)
        )
    ).interpretation(
        custom(str)
    )
    parser = Parser(RULE)
    match = parser.match('a 1')
    with pytest.raises(TypeError):
        match.fact
예제 #7
0
def test_merge_facts():
    F = fact('F', ['a', 'b'])
    A = rule(eq('a').interpretation(F.a)).interpretation(F)
    B = rule(eq('b').interpretation(F.b)).interpretation(F)
    RULE = rule(A, B).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('a b')
    record = match.fact
    assert record == F(a='a', b='b')
    assert record.spans == [(0, 1), (2, 3)]
    assert record.as_json == {'a': 'a', 'b': 'b'}
예제 #8
0
    def __init__(self, logger = None, env = 'local'):

        self.env = env

        if logger is None:
            self.logger = logging.getLogger("AdsExtractor")
            self.logger.setLevel(logging.DEBUG)
            handler = RotatingFileHandler("ads_extractor.log", mode='a', encoding='utf-8', backupCount=5,
                                     maxBytes=1 * 1024 * 1024)
            formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
            handler.setFormatter(formatter)
            self.logger.addHandler(handler)
        else:
            self.logger = logger

        self.texttools = texttools.TextTools(self.logger)

        self.tokenizer = MorphTokenizer()
        self.morph = pymorphy2.MorphAnalyzer()

        EXCLUDE = morph_pipeline([
            'без',
            'не',
            'вправе отказаться',
            'может отказаться',
            'услуга'
        ])

        AGREEMENT = morph_pipeline([
            'соглашаться с получением'
        ])

        SUBJECT = morph_pipeline([
            'рассылка',
            'предложение'
        ])

        KIND = morph_pipeline([
            'рекламный'
        ])

        SPECIALS = morph_pipeline([
            'рекламныя цель'
        ])

        ADS = or_(
            rule(KIND, SUBJECT),
            rule(SUBJECT, KIND),
            or_(SPECIALS, AGREEMENT)
        )

        self.ads_parser = Parser(ADS)
        self.exclude_parser = Parser(rule(EXCLUDE))
예제 #9
0
def get_hyperonyms(main_word):
    HYPONYM = eq(utils.deaccent(main_word))
    RULE = or_(rule(HYPONYM, ATAKJE, START, MID, END), rule(HYPONYM, MID, END),
               rule(START_S, END, KAK, HYPONYM), rule(END, INCLUDING, HYPONYM))
    parser = Parser(RULE)
    text = utils.deaccent(wikipedia.summary(main_word))
    print(text)
    text = re.sub(r'\(.+?\)', '', text)
    text = text.lower().replace('* сергии радонежскии* ', '')
    for idx, match in enumerate(parser.findall(text.lower())):
        k = [_.value for _ in match.tokens]
        print(k)
예제 #10
0
def get_first_rules():
    RU = type('RU')
    INT = type('INT')
    NONE = type('NONE')
    NOUN= gram('NOUN')
    ADJF = gram('ADJF')
    ANIM=gram('anim')
    GENT=gram('gent')
    SGTM=gram('Sgtm')
    CONJ=gram('CONJ')
    PATR=gram('Patr')
    NAME = gram('Name')
    PREP=gram('PREP')


    STATE=or_(
        eq('моторин'),
        eq('юрок'),
        eq('вакула'),
        eq('эйхвальд'),
        eq('иммуно'),
        eq('из'),
        eq('славы'),
        eq('хайбулаев'),
        eq('михална'),
        eq('валиде'),
        eq('шиян'),
        eq('сим'),
        eq('мазитов'),
        eq('хамидов')
    )

    NAME_CONST=rule(
        and_(
        NAME,
        ANIM,
        not_(
            SGTM
        ),
            not_(STATE)
        )
    )

    COMPLICATED=rule(
        NAME_CONST.repeatable()
    )

    FINAL = or_(COMPLICATED)
    return FINAL
예제 #11
0
def test_repeatable_optional():
    A = rule('a')
    assert_bnf(
        A.optional().repeatable(),
        "R0 -> e | 'a' R0 | 'a'",
    )
    assert_bnf(
        A.repeatable().optional(),
        "R0 -> e | 'a' R0 | 'a'",
    )
    assert_bnf(
        A.repeatable().optional().repeatable(),
        "R0 -> e | 'a' R0 | 'a'",
    )
    assert_bnf(A.repeatable().repeatable(), "R0 -> 'a' R0 | 'a'")
    assert_bnf(
        A.optional().optional(),
        "R0 -> e | 'a'",
    )
    assert_bnf(A.repeatable(max=2).repeatable(), "R0 -> 'a' R0 | 'a'")
    assert_bnf(A.repeatable().repeatable(min=1, max=2), "R0 -> 'a' R0 | 'a'")
    assert_bnf(A.optional().repeatable(max=2), 'R0 -> e | R1',
               "R1 -> 'a' 'a' | 'a'")
    assert_bnf(A.repeatable(reverse=True).optional(), "R0 -> e | 'a' | 'a' R0")
    assert_bnf(A.repeatable().repeatable(reverse=True), "R0 -> 'a' | 'a' R0")
    assert_bnf(
        A.repeatable(reverse=True).repeatable(min=1, max=2),
        "R0 -> 'a' | 'a' R0")
    assert_bnf(A.repeatable().repeatable(min=2, reverse=True),
               "R0 -> 'a' R0 | 'a'")
    assert_bnf(A.repeatable(max=2, reverse=True), "R0 -> 'a' | 'a' 'a'")
예제 #12
0
def test_name():
    Name = fact(
        'Name',
        ['first', 'last']
    )

    gnc = gnc_relation()

    FIRST = gram('Name').interpretation(
        Name.first.inflected()
    ).match(gnc)

    LAST = gram('Surn').interpretation(
        Name.last.inflected()
    ).match(gnc)

    NAME = rule(
        FIRST,
        LAST
    ).interpretation(Name)

    parser = Parser(NAME)
    match = parser.match('саше иванову')
    assert match.fact == Name(first='саша', last='иванов')

    match = parser.match('сашу иванову')
    assert match.fact == Name(first='саша', last='иванова')

    match = parser.match('сашу ивановой')
    assert not match
예제 #13
0
    def __init__(self, name=None):

        self.section_rule = rule(
            dictionary({"раздел", "тема", "дисциплина", "наименование"}))

        self.lectures_rule = rule(
            morph_pipeline([
                'тема лекций', 'содержание занятий',
                'содержание лекционного занятия'
            ]))

        self.pract_rule = rule(morph_pipeline(['наименование']))

        self.srs_rule = rule(morph_pipeline(['СРС']))

        self.docxdoc = DocumentPrepare(name).open_doc()
예제 #14
0
def update_rules(name):
    NAME = pipeline(name).interpretation(Socdem.name)

    SOCDEM_ELEMS = rule(or_(NAME, GENDER, date.DATE, AGE, LOCATION))

    SOCDEM = rule(
        NAME, GENDER.optional(),
        or_(
            rule(AGE.optional(),
                 date.DATE.interpretation(Socdem.date_of_birth).optional()),
            rule(
                date.DATE.interpretation(Socdem.date_of_birth).optional(),
                AGE.optional()),
        ), LOCATION.optional()).interpretation(Socdem)

    return SOCDEM_ELEMS, SOCDEM
예제 #15
0
 def __init__(self, names: list = [], version_numbers: list = [], version_names: list = [], consoles: list = []):
     rules = rule(morph_pipeline(names).interpretation(self.__game.name.const(names[0])),
                  morph_pipeline(version_numbers).interpretation(self.__game.version_number).optional(),
                  morph_pipeline(version_names).interpretation(self.__game.version_name).optional(),
                  morph_pipeline(consoles).interpretation(self.__game.console).optional())
     game = or_(rules).interpretation(self.__game)
     self.parser = Parser(game)
예제 #16
0
def test_inflected_custom():
    MONTHS = {'январь': 1}
    RULE = rule('январе').interpretation(
        inflected({'nomn', 'sing'}).custom(MONTHS.get))
    parser = Parser(RULE)
    match = parser.match('январе')
    assert match.fact == 1
예제 #17
0
def _abbreviate(word: str, abbrs: List[str], opt=False):
    abbrs, dashed = partition(lambda abbr: '-' in abbr, abbrs)
    dashed = map(
        lambda a: rule(*map(caseless, intersperse('-', a.split('-')))), dashed)

    original_word = rule(normalized(word))
    dashed_sequence = rule(or_(*dashed))
    abbr_with_dot = rule(
        or_(*map(caseless, abbrs)),
        eq('.').optional(),
    )

    result = or_(original_word, dashed_sequence, abbr_with_dot) \
        .interpretation(interpretation.const(word))

    return result.optional() if opt else result
예제 #18
0
def get_all_collocation(lines, word):
    """
    Function for finding all collocations of word and any word after it.

    :param lines: list of string
        Lines for processing.

    :param word: str
        Word for searching.

    :return:
        List of all valid collocations.
    """
    if not isinstance(lines, list) or not isinstance(word, str):
        raise TypeError

    gr = rule(normalized(word),
              and_(not_(yargy_type('PUNCT')), not_(yargy_type('OTHER'))))

    result_list = []

    for line in lines:
        if not isinstance(line, str):
            raise TypeError
        for match in Parser(gr).findall(line):
            result_list.append(' '.join(
                [Normalizer.normalise(token.value) for token in match.tokens]))

    return result_list
예제 #19
0
def test_merge_facts():
    F = fact('F', ['a', 'b'])
    A = rule(
        eq('a').interpretation(F.a)
    ).interpretation(F)
    B = rule(
        eq('b').interpretation(F.b)
    ).interpretation(F)
    RULE = rule(
        A, B
    ).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('a b')
    record = match.fact
    assert record == F(a='a', b='b')
    assert record.spans == [(0, 1), (2, 3)]
    assert record.as_json == {'a': 'a', 'b': 'b'}
예제 #20
0
def test_inflected_custom_attribute():
    F = fact('F', ['a'])
    MONTHS = {'январь': 1}
    RULE = rule('январе').interpretation(
        F.a.inflected({'nomn', 'sing'}).custom(MONTHS.get)).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('январе')
    assert match.fact == F(a=1)
예제 #21
0
def test_normalized_custom_attribute():
    F = fact('F', ['a'])
    MONTHS = {'январь': 1}
    RULE = rule('январе').interpretation(F.a.normalized().custom(
        MONTHS.get)).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('январе')
    assert match.fact == F(a=1)
예제 #22
0
def test_inflected():
    RULE = rule(
        'московским'
    ).interpretation(
        inflected({'nomn', 'femn'})
    )
    parser = Parser(RULE)
    match = parser.match('московским')
    assert match.fact == 'московская'
예제 #23
0
def test_rule_custom():
    RULE = rule(
        '3', '.', '14'
    ).interpretation(
        custom(float)
    )
    parser = Parser(RULE)
    match = parser.match('3.14')
    assert match.fact == 3.14
예제 #24
0
def text_normalized():
    RULE = rule(
        'московским'
    ).interpretation(
        normalized()
    )
    parser = Parser(RULE)
    match = parser.match('московским')
    assert match.fact == 'московский'
예제 #25
0
def test_inflected():
    RULE = rule(
        'московским'
    ).interpretation(
        inflected({'nomn', 'femn'})
    )
    parser = Parser(RULE)
    match = parser.match('московским')
    assert match.fact == 'московская'
예제 #26
0
def test_const():
    RULE = rule(
        'a'
    ).interpretation(
        const(1)
    )
    parser = Parser(RULE)
    match = parser.match('a')
    assert match.fact == 1
예제 #27
0
def test_const():
    RULE = rule(
        'a'
    ).interpretation(
        const(1)
    )
    parser = Parser(RULE)
    match = parser.match('a')
    assert match.fact == 1
예제 #28
0
def test_rule_custom():
    RULE = rule(
        '3', '.', '14'
    ).interpretation(
        custom(float)
    )
    parser = Parser(RULE)
    match = parser.match('3.14')
    assert match.fact == 3.14
예제 #29
0
def text_normalized():
    RULE = rule(
        'московским'
    ).interpretation(
        normalized()
    )
    parser = Parser(RULE)
    match = parser.match('московским')
    assert match.fact == 'московский'
예제 #30
0
def test_attribute_normalized():
    F = fact('F', 'a')
    RULE = rule('январе').interpretation(F.a.normalized()).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('январе')
    record = match.fact
    assert record == F(a='январь')
    assert record.spans == [(0, 6)]
    assert record.as_json == {'a': 'январь'}
예제 #31
0
def test_bnf():
    from yargy.interpretation import fact
    from yargy.relations import gnc_relation

    F = fact('F', ['a'])
    gnc = gnc_relation()

    assert_bnf(rule('a').named('A').interpretation(F), "F -> 'a'")
    assert_bnf(
        rule('a').interpretation(F.a).interpretation(F), 'F -> F.a',
        "F.a -> 'a'")
    assert_bnf(rule('a').match(gnc).interpretation(F.a), "F.a^gnc -> 'a'")
    assert_bnf(
        rule('a').interpretation(F.a).repeatable(), 'R0 -> F.a | F.a R0',
        "F.a -> 'a'")
    assert_bnf(
        rule('a').repeatable().interpretation(F.a), 'F.a -> R1',
        "R1 -> 'a' | 'a' R1")
예제 #32
0
def test_rule_attribute():
    F = fact('F', ['a'])
    RULE = rule('a', 'A').interpretation(F.a).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('a   A')
    record = match.fact
    assert record == F(a='a A')
    assert record.spans == [(0, 5)]
    assert record.as_json == {'a': 'a A'}
예제 #33
0
def test_attribute_custom():
    F = fact('F', 'a')
    RULE = rule('1').interpretation(F.a.custom(int)).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('1')
    record = match.fact
    assert record == F(a=1)
    assert record.spans == [(0, 1)]
    assert record.as_json == {'a': 1}
예제 #34
0
def test_attribute_custom_custom():
    F = fact('F', 'a')
    MAPPING = {'a': 1}
    RULE = rule('A').interpretation(F.a.custom(str.lower).custom(
        MAPPING.get)).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('A')
    record = match.fact
    assert record == F(a=1)
예제 #35
0
def test_rule_custom_custom():
    MAPPING = {'a': 1}
    RULE = rule(
        'A'
    ).interpretation(
        custom(str.lower).custom(MAPPING.get)
    )
    parser = Parser(RULE)
    match = parser.match('A')
    assert match.fact == 1
예제 #36
0
def test_constant_attribute():
    MONEY_RULE = rule(
        gram('INT').interpretation(Money.count),
        dictionary({'тысяча'}).interpretation(Money.base.const(10**3)),
        dictionary({'рубль', 'доллар'}).interpretation(Money.currency),
    ).interpretation(Money)

    parser = Parser(MONEY_RULE)
    matches = list(parser.match('1 тысяча рублей'))
    assert matches[0].fact == Money(count=1, base=1000, currency='рублей')
예제 #37
0
def test_attribute():
    F = fact('F', 'a')
    RULE = rule(
        'a'
    ).interpretation(
        F.a
    )
    parser = Parser(RULE)
    match = parser.match('a')
    assert match.fact == 'a'
예제 #38
0
    def define(self, item, *items):
        from yargy import rule

        if not items and is_rule(item):
            if isinstance(item, ForwardRule):
                raise ValueError('forward(forward(...)) not allowed')
            self.rule = item
        else:
            self.rule = rule(item, *items)
        return self
예제 #39
0
def test_attribute_const():
    F = fact('F', 'a')
    RULE = rule(
        'январь'
    ).interpretation(
        F.a.const(1)
    )
    parser = Parser(RULE)
    match = parser.match('январь')
    assert match.fact == 1
예제 #40
0
def test_insted_attributes():
    F = fact('F', ['a', 'b'])
    RULE = rule(eq('a').interpretation(F.a)).interpretation(
        F.b).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('a')
    record = match.fact
    assert record == F(a=None, b='a')
    assert record.spans == [(0, 1)]
    assert record.as_json == {'b': 'a'}
예제 #41
0
def test_pipeline():
    RULE = rule(
        pipeline(['a b c', 'b c']),
        'd'
    )
    parser = Parser(RULE)
    assert parser.match('b c d')
    assert parser.match('a b c d')

    RULE = rule(
        pipeline(['a b']).repeatable(),
        'c'
    )
    parser = Parser(RULE)
    assert parser.match('a b a b c')

    RULE = rule(
        caseless_pipeline(['A B']),
        'c'
    )
    parser = Parser(RULE)
    assert parser.match('A b c')

    RULE = morph_pipeline([
        'текст',
        'текст песни',
        'материал',
        'информационный материал',
    ])
    parser = Parser(RULE)
    matches = list(parser.findall('текстом песни музыкальной группы'))
    assert len(matches) == 1
    match = matches[0]
    assert [_.value for _ in match.tokens] == ['текстом', 'песни']

    matches = list(parser.findall('информационного материала под названием'))
    assert len(matches) == 1
    match = matches[0]
    assert [_.value for _ in match.tokens] == ['информационного', 'материала']

    RULE = morph_pipeline(['1 B.'])
    parser = Parser(RULE)
    assert parser.match('1 b .')
예제 #42
0
def test_predicate_attribute():
    F = fact('F', ['a'])
    RULE = rule(
        eq('a').interpretation(F.a)
    ).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('a')
    record = match.fact
    assert record == F(a='a')
    assert record.spans == [(0, 1)]
    assert record.as_json == {'a': 'a'}
예제 #43
0
def test_rule_attribute_custom():
    F = fact('F', ['a'])
    RULE = rule(
        '1'
    ).interpretation(
        F.a
    ).interpretation(
        custom(int)
    )
    parser = Parser(RULE)
    match = parser.match('1')
    assert match.fact == 1
예제 #44
0
def test_inflected_custom():
    MONTHS = {
        'январь': 1
    }
    RULE = rule(
        'январе'
    ).interpretation(
        inflected({'nomn', 'sing'}).custom(MONTHS.get)
    )
    parser = Parser(RULE)
    match = parser.match('январе')
    assert match.fact == 1
예제 #45
0
def test_normalized_custom():
    MONTHS = {
        'январь': 1
    }
    RULE = rule(
        'январе'
    ).interpretation(
        normalized().custom(MONTHS.get)
    )
    parser = Parser(RULE)
    match = parser.match('январе')
    assert match.fact == 1
예제 #46
0
def test_insted_attributes():
    F = fact('F', ['a', 'b'])
    RULE = rule(
        eq('a').interpretation(F.a)
    ).interpretation(
        F.b
    ).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('a')
    record = match.fact
    assert record == F(a=None, b='a')
    assert record.spans == [(0, 1)]
    assert record.as_json == {'b': 'a'}
예제 #47
0
def test_repeatable():
    F = fact('F', [attribute('a').repeatable()])
    RULE = rule(
        eq('a').interpretation(F.a),
        eq('b').interpretation(F.a)
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('a b')
    record = match.fact
    assert record == F(a=['a', 'b'])
    assert record.spans == [(0, 1), (2, 3)]
    assert record.as_json == {'a': ['a', 'b']}
예제 #48
0
def test_attribute_custom_custom():
    F = fact('F', 'a')
    MAPPING = {'a': 1}
    RULE = rule(
        'A'
    ).interpretation(
        F.a.custom(str.lower).custom(MAPPING.get)
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('A')
    record = match.fact
    assert record == F(a=1)
예제 #49
0
def test_inflected_custom_attribute():
    F = fact('F', ['a'])
    MONTHS = {
        'январь': 1
    }
    RULE = rule(
        'январе'
    ).interpretation(
        F.a.inflected({'nomn', 'sing'}).custom(MONTHS.get)
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('январе')
    assert match.fact == F(a=1)
예제 #50
0
def test_attribute_normalized():
    F = fact('F', 'a')
    RULE = rule(
        'январе'
    ).interpretation(
        F.a.normalized()
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('январе')
    record = match.fact
    assert record == F(a='январь')
    assert record.spans == [(0, 6)]
    assert record.as_json == {'a': 'январь'}
예제 #51
0
def test_attribute_custom():
    F = fact('F', 'a')
    RULE = rule(
        '1'
    ).interpretation(
        F.a.custom(int)
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('1')
    record = match.fact
    assert record == F(a=1)
    assert record.spans == [(0, 1)]
    assert record.as_json == {'a': 1}
예제 #52
0
def test_attribute_inflected():
    F = fact('F', 'a')
    RULE = rule(
        'январе'
    ).interpretation(
        F.a.inflected({'nomn', 'plur'})
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('январе')
    record = match.fact
    assert record == F(a='январи')
    assert record.spans == [(0, 6)]
    assert record.as_json == {'a': 'январи'}
예제 #53
0
def test_normalized_custom_attribute():
    F = fact('F', ['a'])
    MONTHS = {
        'январь': 1
    }
    RULE = rule(
        'январе'
    ).interpretation(
        F.a.normalized().custom(MONTHS.get)
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('январе')
    assert match.fact == F(a=1)
예제 #54
0
def test_nested_facts():
    F = fact('F', ['a'])
    G = fact('G', ['b'])
    RULE = rule(
        eq('a').interpretation(F.a)
    ).interpretation(
        F
    ).interpretation(
        G.b
    ).interpretation(
        G
    )
    parser = Parser(RULE)
    match = parser.match('a')
    record = match.fact
    assert record == G(b=F(a='a'))
    assert record.spans == [(0, 1)]
    assert record.as_json == {'b': {'a': 'a'}}
예제 #55
0
MONTH = and_(
    gte(1),
    lte(12)
).interpretation(
    Date.month.custom(int)
)

DAY = and_(
    gte(1),
    lte(31)
).interpretation(
    Date.day.custom(int)
)

YEAR_WORD = or_(
    rule('г', eq('.').optional()),
    rule(normalized('год'))
)

YEAR = and_(
    gte(1000),
    lte(2100)
).interpretation(
    Date.year.custom(int)
)

YEAR_SHORT = and_(
    length_eq(2),
    gte(0),
    lte(99)
).interpretation(
예제 #56
0
EURO = or_(
    normalized('евро'),
    eq('€')
).interpretation(
    const(dsl.EURO)
)

DOLLARS = or_(
    normalized('доллар'),
    eq('$')
).interpretation(
    const(dsl.DOLLARS)
)

RUBLES = or_(
    rule(normalized('рубль')),
    rule(
        or_(
            caseless('руб'),
            caseless('р'),
            eq('₽')
        ),
        DOT.optional()
    )
).interpretation(
    const(dsl.RUBLES)
)

CURRENCY = or_(
    EURO,
    DOLLARS,
예제 #57
0
    ['number', 'type']
)


DASH = eq('-')
DOT = eq('.')

ADJF = gram('ADJF')
NOUN = gram('NOUN')
INT = type('INT')
TITLE = is_title()

ANUM = rule(
    INT,
    DASH.optional(),
    in_caseless({
        'я', 'й', 'е',
        'ое', 'ая', 'ий', 'ой'
    })
)


#########
#
#  STRANA
#
##########


# TODO
COUNTRY_VALUE = dictionary({
    'россия',
예제 #58
0
    'агроном',

    'президент',
    'сопрезидент',
    'вице-президент',
    'экс-президент',
    'председатель',
    'руководитель',
    'директор',
    'глава',
])

GENT = gram('gent')

WHERE = or_(
    rule(GENT),
    rule(GENT, GENT),
    rule(GENT, GENT, GENT),
    rule(GENT, GENT, GENT, GENT),
    rule(GENT, GENT, GENT, GENT, GENT),
)

POSITION = or_(
    POSITION,
    rule(POSITION, WHERE)
).interpretation(
    Person.position
)

NAME = NAME.interpretation(
    Person.name
예제 #59
0
    ABBR,
    TITLE
).interpretation(
    Name.middle
).match(gnc)


#########
#
#  FI IF
#
#########


FIRST_LAST = rule(
    FIRST,
    LAST
)

LAST_FIRST = rule(
    LAST,
    FIRST
)


###########
#
#  ABBR
#
###########

예제 #60
0
from yargy.relations import gnc_relation


Location = fact(
    'Location',
    ['name'],
)


gnc = gnc_relation()

REGION = rule(
    gram('ADJF').match(gnc),
    dictionary({
        'край',
        'район',
        'область',
        'губерния',
        'уезд',
    }).match(gnc),
).interpretation(Location.name.inflected())

gnc = gnc_relation()

FEDERAL_DISTRICT = rule(
    rule(caseless('северо'), '-').optional(),
    dictionary({
        'центральный',
        'западный',
        'южный',
        'кавказский',
        'приволжский',