def test_pipeline_key(): from parser.parser import morph_pipeline pipeline = morph_pipeline([ 'закрытое общество', 'завод' ]) F = fact('F', ['a']) RULE = pipeline.interpretation( F.a.normalized() ).interpretation( F ) parser = Parser(RULE) match = parser.match('закрытом обществе') record = match.fact assert record == F(a='закрытое общество') RULE = pipeline.interpretation( normalized() ) parser = Parser(RULE) match = parser.match('заводе') value = match.fact assert value == 'завод'
def test_type_errors(): F = fact('F', ['a']) RULE = rule( 'a', eq('1').interpretation( custom(int) ) ).interpretation( F.a ) parser = Parser(RULE) match = parser.match('a 1') with pytest.raises(TypeError): match.fact F = fact('F', ['a']) RULE = rule( 'a', eq('1').interpretation( custom(int) ) ).interpretation( custom(str) ) parser = Parser(RULE) match = parser.match('a 1') with pytest.raises(TypeError): match.fact
def test_pipeline(): RULE = rule(pipeline(['a b c', 'b c']), 'd') parser = Parser(RULE) assert parser.match('b c d') assert parser.match('a b c d') RULE = rule(pipeline(['a b']).repeatable(), 'c') parser = Parser(RULE) assert parser.match('a b a b c') RULE = rule(caseless_pipeline(['A B']), 'c') parser = Parser(RULE) assert parser.match('A b c') RULE = morph_pipeline([ 'текст', 'текст песни', 'материал', 'информационный материал', ]) parser = Parser(RULE) matches = list(parser.findall('текстом песни музыкальной группы')) assert len(matches) == 1 match = matches[0] assert [_.value for _ in match.tokens] == ['текстом', 'песни'] matches = list(parser.findall('информационного материала под названием')) assert len(matches) == 1 match = matches[0] assert [_.value for _ in match.tokens] == ['информационного', 'материала'] RULE = morph_pipeline(['1 B.']) parser = Parser(RULE) assert parser.match('1 b .')
def test_person(): Name = fact( 'Name', ['first', 'last'], ) Person = fact( 'Person', ['position', 'name'] ) LAST = and_( gram('Surn'), not_(gram('Abbr')), ) FIRST = and_( gram('Name'), not_(gram('Abbr')), ) POSITION = morph_pipeline([ 'управляющий директор', 'вице-мэр' ]) gnc = gnc_relation() NAME = rule( FIRST.interpretation( Name.first ).match(gnc), LAST.interpretation( Name.last ).match(gnc) ).interpretation( Name ) PERSON = rule( POSITION.interpretation( Person.position ).match(gnc), NAME.interpretation( Person.name ) ).interpretation( Person ) parser = Parser(PERSON) match = parser.match('управляющий директор Иван Ульянов') assert match assert match.fact == Person( position='управляющий директор', name=Name( first='Иван', last='Ульянов' ) )
def test_const(): RULE = rule( 'a' ).interpretation( const(1) ) parser = Parser(RULE) match = parser.match('a') assert match.fact == 1
def test_inflected(): RULE = rule( 'московским' ).interpretation( inflected({'nomn', 'femn'}) ) parser = Parser(RULE) match = parser.match('московским') assert match.fact == 'московская'
def text_normalized(): RULE = rule( 'московским' ).interpretation( normalized() ) parser = Parser(RULE) match = parser.match('московским') assert match.fact == 'московский'
def test_rule_custom(): RULE = rule( '3', '.', '14' ).interpretation( custom(float) ) parser = Parser(RULE) match = parser.match('3.14') assert match.fact == 3.14
def test_main(): relation = and_(number_relation(), gender_relation()) A = rule(gram('Surn'), main(gram('Name'))).match(relation) B = gram('VERB').match(relation) AB = rule(A, B) parser = Parser(AB) match = parser.match('иванов иван стал') assert match match = parser.match('иванов иван стали') assert not match match = parser.match('ивановы иван стал') assert match
def test_attribute_const(): F = fact('F', 'a') RULE = rule( 'январь' ).interpretation( F.a.const(1) ) parser = Parser(RULE) match = parser.match('январь') assert match.fact == 1
def test_attribute(): F = fact('F', 'a') RULE = rule( 'a' ).interpretation( F.a ) parser = Parser(RULE) match = parser.match('a') assert match.fact == 'a'
def test_rule_custom_custom(): MAPPING = {'a': 1} RULE = rule( 'A' ).interpretation( custom(str.lower).custom(MAPPING.get) ) parser = Parser(RULE) match = parser.match('A') assert match.fact == 1
def test_predicate_attribute(): F = fact('F', ['a']) RULE = rule( eq('a').interpretation(F.a) ).interpretation(F) parser = Parser(RULE) match = parser.match('a') record = match.fact assert record == F(a='a') assert record.spans == [(0, 1)] assert record.as_json == {'a': 'a'}
def test_name(): Name = fact('Name', ['first', 'last']) gnc = gnc_relation() FIRST = gram('Name').interpretation(Name.first.inflected()).match(gnc) LAST = gram('Surn').interpretation(Name.last.inflected()).match(gnc) NAME = rule(FIRST, LAST).interpretation(Name) parser = Parser(NAME) match = parser.match('саше иванову') assert match.fact == Name(first='саша', last='иванов') match = parser.match('сашу иванову') assert match.fact == Name(first='саша', last='иванова') match = parser.match('сашу ивановой') assert not match
def test_rule_attribute_custom(): F = fact('F', ['a']) RULE = rule( '1' ).interpretation( F.a ).interpretation( custom(int) ) parser = Parser(RULE) match = parser.match('1') assert match.fact == 1
def test_inflected_custom(): MONTHS = { 'январь': 1 } RULE = rule( 'январе' ).interpretation( inflected({'nomn', 'sing'}).custom(MONTHS.get) ) parser = Parser(RULE) match = parser.match('январе') assert match.fact == 1
def test_normalized_custom(): MONTHS = { 'январь': 1 } RULE = rule( 'январе' ).interpretation( normalized().custom(MONTHS.get) ) parser = Parser(RULE) match = parser.match('январе') assert match.fact == 1
def test_insted_attributes(): F = fact('F', ['a', 'b']) RULE = rule( eq('a').interpretation(F.a) ).interpretation( F.b ).interpretation(F) parser = Parser(RULE) match = parser.match('a') record = match.fact assert record == F(a=None, b='a') assert record.spans == [(0, 1)] assert record.as_json == {'b': 'a'}
def test_repeatable(): F = fact('F', [attribute('a').repeatable()]) RULE = rule( eq('a').interpretation(F.a), eq('b').interpretation(F.a) ).interpretation( F ) parser = Parser(RULE) match = parser.match('a b') record = match.fact assert record == F(a=['a', 'b']) assert record.spans == [(0, 1), (2, 3)] assert record.as_json == {'a': ['a', 'b']}
def test_attribute_custom_custom(): F = fact('F', 'a') MAPPING = {'a': 1} RULE = rule( 'A' ).interpretation( F.a.custom(str.lower).custom(MAPPING.get) ).interpretation( F ) parser = Parser(RULE) match = parser.match('A') record = match.fact assert record == F(a=1)
def test_normalized_custom_attribute(): F = fact('F', ['a']) MONTHS = { 'январь': 1 } RULE = rule( 'январе' ).interpretation( F.a.normalized().custom(MONTHS.get) ).interpretation( F ) parser = Parser(RULE) match = parser.match('январе') assert match.fact == F(a=1)
def test_attribute_inflected(): F = fact('F', 'a') RULE = rule( 'январе' ).interpretation( F.a.inflected({'nomn', 'plur'}) ).interpretation( F ) parser = Parser(RULE) match = parser.match('январе') record = match.fact assert record == F(a='январи') assert record.spans == [(0, 6)] assert record.as_json == {'a': 'январи'}
def test_inflected_custom_attribute(): F = fact('F', ['a']) MONTHS = { 'январь': 1 } RULE = rule( 'январе' ).interpretation( F.a.inflected({'nomn', 'sing'}).custom(MONTHS.get) ).interpretation( F ) parser = Parser(RULE) match = parser.match('январе') assert match.fact == F(a=1)
def test_attribute_normalized(): F = fact('F', 'a') RULE = rule( 'январе' ).interpretation( F.a.normalized() ).interpretation( F ) parser = Parser(RULE) match = parser.match('январе') record = match.fact assert record == F(a='январь') assert record.spans == [(0, 6)] assert record.as_json == {'a': 'январь'}
def test_attribute_custom(): F = fact('F', 'a') RULE = rule( '1' ).interpretation( F.a.custom(int) ).interpretation( F ) parser = Parser(RULE) match = parser.match('1') record = match.fact assert record == F(a=1) assert record.spans == [(0, 1)] assert record.as_json == {'a': 1}
def test_merge_facts(): F = fact('F', ['a', 'b']) A = rule( eq('a').interpretation(F.a) ).interpretation(F) B = rule( eq('b').interpretation(F.b) ).interpretation(F) RULE = rule( A, B ).interpretation(F) parser = Parser(RULE) match = parser.match('a b') record = match.fact assert record == F(a='a', b='b') assert record.spans == [(0, 1), (2, 3)] assert record.as_json == {'a': 'a', 'b': 'b'}
def test_nested_facts(): F = fact('F', ['a']) G = fact('G', ['b']) RULE = rule( eq('a').interpretation(F.a) ).interpretation( F ).interpretation( G.b ).interpretation( G ) parser = Parser(RULE) match = parser.match('a') record = match.fact assert record == G(b=F(a='a')) assert record.spans == [(0, 1)] assert record.as_json == {'b': {'a': 'a'}}