Python Parser示例，yargy.Parser Python示例

示例#1

0

显示文件

文件： test_interpretation.py 项目： bureaucratic-labs/yargy

def test_pipeline_key():
    from yargy.pipelines import morph_pipeline

    pipeline = morph_pipeline([
        'закрытое общество',
        'завод'
    ])

    F = fact('F', ['a'])

    RULE = pipeline.interpretation(
        F.a.normalized()
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('закрытом обществе')
    record = match.fact
    assert record == F(a='закрытое общество')

    RULE = pipeline.interpretation(
        normalized()
    )
    parser = Parser(RULE)
    match = parser.match('заводе')
    value = match.fact
    assert value == 'завод'

示例#2

0

显示文件

文件： helpers.py 项目： ruslankhayrut/russianCVparser

def show_matches(rule, *lines):
    parser = Parser(rule)
    for line in lines:
        matches = parser.findall(line)
        spans = [_.span for _ in matches]

        show_markup(line, spans)

示例#3

0

显示文件

文件： test_interpretation.py 项目： bureaucratic-labs/yargy

def test_type_errors():
    F = fact('F', ['a'])
    RULE = rule(
        'a',
        eq('1').interpretation(
            custom(int)
        )
    ).interpretation(
        F.a
    )
    parser = Parser(RULE)
    match = parser.match('a 1')
    with pytest.raises(TypeError):
        match.fact

    F = fact('F', ['a'])
    RULE = rule(
        'a',
        eq('1').interpretation(
            custom(int)
        )
    ).interpretation(
        custom(str)
    )
    parser = Parser(RULE)
    match = parser.match('a 1')
    with pytest.raises(TypeError):
        match.fact

示例#4

0

显示文件

文件： ogrnextractor.py 项目： michael-pavlov/dsh_kazan_hack

    def __init__(self, logger=None, env='local'):

        self.env = env

        if logger is None:
            self.logger = logging.getLogger("OGRNExtractor")
            self.logger.setLevel(logging.DEBUG)
            handler = RotatingFileHandler("ogrn_extractor.log",
                                          mode='a',
                                          encoding='utf-8',
                                          backupCount=5,
                                          maxBytes=1 * 1024 * 1024)
            formatter = logging.Formatter(
                '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
            handler.setFormatter(formatter)
            self.logger.addHandler(handler)
        else:
            self.logger = logger

        self.tokenizer = MorphTokenizer()

        OGRN = morph_pipeline([
            'огрн', 'основной государственный регистрационный номер', 'огрнип'
        ])

        INT = type('INT')

        OGRN_NUMBER = rule(OGRN, INT)

        self.full_ogrn_parser = Parser(OGRN_NUMBER)
        self.ogrn_num_parser = Parser(rule(INT))

示例#5

0

显示文件

文件： test_interpretation.py 项目： vitalyvels/yargy

def test_inflected_custom():
    MONTHS = {'январь': 1}
    RULE = rule('январе').interpretation(
        inflected({'nomn', 'sing'}).custom(MONTHS.get))
    parser = Parser(RULE)
    match = parser.match('январе')
    assert match.fact == 1

示例#6

0

显示文件

 def __init__(self, names: list = [], version_numbers: list = [], version_names: list = [], consoles: list = []):
     rules = rule(morph_pipeline(names).interpretation(self.__game.name.const(names[0])),
                  morph_pipeline(version_numbers).interpretation(self.__game.version_number).optional(),
                  morph_pipeline(version_names).interpretation(self.__game.version_name).optional(),
                  morph_pipeline(consoles).interpretation(self.__game.console).optional())
     game = or_(rules).interpretation(self.__game)
     self.parser = Parser(game)

示例#7

0

显示文件

def test_samples(rules: Union[NamedRule, List[NamedRule]],
                 texts: List[str],
                 num: int = 20,
                 seed: int = None,
                 markup=None,
                 fact=False):
    from random import seed as sed, sample

    sed(seed)
    texts, num = (texts,
                  len(texts)) if len(texts) < num else (sample(texts, num),
                                                        num)
    results: Dict[int, Dict[int, List]] = defaultdict(dict)

    if not (isinstance(rules, list) or isinstance(rules, tuple)):
        rules = [rules]

    for rule_idx, rule in enumerate(rules):
        parser = Parser(rule)

        for text_idx in range(num):
            matches = parser.findall(texts[text_idx])
            results[text_idx][rule_idx] = list(matches)

    for text_idx, rule_matches in results.items():
        spans = [(m.span[0], m.span[1], str(rules[rule_idx].name))
                 for rule_idx, matches in rule_matches.items()
                 for m in matches]

        show_markup(texts[text_idx], spans, markup or BoxLabelMarkup)

        if fact:
            for rule_idx, matches in rule_matches.items():
                for m in matches:
                    display(m.fact)

示例#8

0

显示文件

def test(rule, *lines, tree=False, facts=False):
    is_at = lambda span, set: any((span == s) for s in set)
    parser = Parser(rule)

    for line in lines:
        if isinstance(line, str):
            text, expected = line, []
        else:
            text = line[0]
            expected = [find(text, substr) for substr in line[1:]]

        matches = list(sorted(parser.findall(text), key=lambda _: _.span))
        # display(matches)
        matched_spans = [_.span for _ in matches]
        spans = [(s[0], s[1], '#aec7e8' if is_at(s, expected) else '#ff9896') for s in matched_spans] \
                + [(s[0], s[1], '#ccc') for s in expected if not is_at((s[0], s[1]), matched_spans)]

        show_markup(text, [s for s in spans if s[0] < s[1]], LineMarkup)

        if matches:
            for _ in matches:
                if tree:
                    display(matches[0].tree.as_dot)
                if facts:
                    display(_.fact)

示例#9

0

显示文件

    def predict(self, input):
        address = Address()
        
        # Parse cities
        matches = list(Parser(CityFilter).findall(input))
        if (len(matches)):
            Fact = matches[0].fact
            address.city = (Fact.title, Fact.prefix)

        # Parse streets
        matches = list(Parser(StreetFilter).findall(input))
        if (len(matches)):
            Fact = matches[0].fact
            address.street = (Fact.title, Fact.prefix)

        # Parse buildings
        matches = list(Parser(BuildingFilter).findall(input))
        if (len(matches)):
            Fact = matches[0].fact
            address.building = (Fact.house, Fact.corpus, Fact.structure)

        # Parse appartments
        matches = list(Parser(AppartmentFilter).findall(input))
        if (len(matches)):
            Fact = matches[0].fact
            address.appartment = Fact.appartment

        return address

示例#10

0

显示文件

def test_name():
    Name = fact(
        'Name',
        ['first', 'last']
    )

    gnc = gnc_relation()

    FIRST = gram('Name').interpretation(
        Name.first.inflected()
    ).match(gnc)

    LAST = gram('Surn').interpretation(
        Name.last.inflected()
    ).match(gnc)

    NAME = rule(
        FIRST,
        LAST
    ).interpretation(Name)

    parser = Parser(NAME)
    match = parser.match('саше иванову')
    assert match.fact == Name(first='саша', last='иванов')

    match = parser.match('сашу иванову')
    assert match.fact == Name(first='саша', last='иванова')

    match = parser.match('сашу ивановой')
    assert not match

示例#11

0

显示文件

文件： test_person.py 项目： vitalyvels/yargy

def test_person():
    Name = fact(
        'Name',
        ['first', 'last'],
    )
    Person = fact('Person', ['position', 'name'])

    LAST = and_(
        gram('Surn'),
        not_(gram('Abbr')),
    )
    FIRST = and_(
        gram('Name'),
        not_(gram('Abbr')),
    )

    POSITION = morph_pipeline(['управляющий директор', 'вице-мэр'])

    gnc = gnc_relation()
    NAME = rule(
        FIRST.interpretation(Name.first).match(gnc),
        LAST.interpretation(Name.last).match(gnc)).interpretation(Name)

    PERSON = rule(
        POSITION.interpretation(Person.position).match(gnc),
        NAME.interpretation(Person.name)).interpretation(Person)

    parser = Parser(PERSON)

    match = parser.match('управляющий директор Иван Ульянов')
    assert match

    assert match.fact == Person(position='управляющий директор',
                                name=Name(first='Иван', last='Ульянов'))

示例#12

0

显示文件

文件： test_interpretation.py 项目： vitalyvels/yargy

def test_inflected_custom_attribute():
    F = fact('F', ['a'])
    MONTHS = {'январь': 1}
    RULE = rule('январе').interpretation(
        F.a.inflected({'nomn', 'sing'}).custom(MONTHS.get)).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('январе')
    assert match.fact == F(a=1)

示例#13

0

显示文件

文件： test_interpretation.py 项目： vitalyvels/yargy

def test_normalized_custom_attribute():
    F = fact('F', ['a'])
    MONTHS = {'январь': 1}
    RULE = rule('январе').interpretation(F.a.normalized().custom(
        MONTHS.get)).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('январе')
    assert match.fact == F(a=1)

示例#14

0

显示文件

文件： extractors.py 项目： AbbaVaR/1191b_Abbazov_mobile

    def __init__(self, rule, morph):
        # wraps pymorphy subclass
        # add methods check_gram, normalized
        # uses parse method that is cached
        morph = MorphAnalyzer(morph)

        tokenizer = MorphTokenizer(morph=morph)
        YargyParser.__init__(self, rule, tokenizer=tokenizer)

示例#15

0

显示文件

文件： ogrnextractor.py 项目： michael-pavlov/dsh_kazan_hack

class OGRNExtractor:
    def __init__(self, logger=None, env='local'):

        self.env = env

        if logger is None:
            self.logger = logging.getLogger("OGRNExtractor")
            self.logger.setLevel(logging.DEBUG)
            handler = RotatingFileHandler("ogrn_extractor.log",
                                          mode='a',
                                          encoding='utf-8',
                                          backupCount=5,
                                          maxBytes=1 * 1024 * 1024)
            formatter = logging.Formatter(
                '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
            handler.setFormatter(formatter)
            self.logger.addHandler(handler)
        else:
            self.logger = logger

        self.tokenizer = MorphTokenizer()

        OGRN = morph_pipeline([
            'огрн', 'основной государственный регистрационный номер', 'огрнип'
        ])

        INT = type('INT')

        OGRN_NUMBER = rule(OGRN, INT)

        self.full_ogrn_parser = Parser(OGRN_NUMBER)
        self.ogrn_num_parser = Parser(rule(INT))

    def preprocess(self, line):
        line = line.replace("\n", " ").replace("&quot;", "\"")
        return line

    def extract(self, line):
        line = self.preprocess(line)

        matches = list(self.full_ogrn_parser.findall(line))
        spans = [_.span for _ in matches]

        result = []
        for span in spans:
            match = line[span.start:span.stop]
            int_matches = list(self.ogrn_num_parser.findall(match))
            int_spans = [_.span for _ in int_matches]
            for int_span in int_spans:
                int_match = match[int_span.start:int_span.stop]
                result.append(int_match)

        result = list(set(result))
        return result

    def show_tokens(self, line):
        line = line.replace("\n", " ").replace("&quot;", "\"")
        return list(self.tokenizer(line))

示例#16

0

显示文件

文件： test_interpretation.py 项目： bureaucratic-labs/yargy

def test_const():
    RULE = rule(
        'a'
    ).interpretation(
        const(1)
    )
    parser = Parser(RULE)
    match = parser.match('a')
    assert match.fact == 1

示例#17

0

显示文件

def test_inflected():
    RULE = rule(
        'московским'
    ).interpretation(
        inflected({'nomn', 'femn'})
    )
    parser = Parser(RULE)
    match = parser.match('московским')
    assert match.fact == 'московская'

示例#18

0

显示文件

文件： test_interpretation.py 项目： bureaucratic-labs/yargy

def text_normalized():
    RULE = rule(
        'московским'
    ).interpretation(
        normalized()
    )
    parser = Parser(RULE)
    match = parser.match('московским')
    assert match.fact == 'московский'

示例#19

0

显示文件

文件： test_interpretation.py 项目： bureaucratic-labs/yargy

def test_rule_custom():
    RULE = rule(
        '3', '.', '14'
    ).interpretation(
        custom(float)
    )
    parser = Parser(RULE)
    match = parser.match('3.14')
    assert match.fact == 3.14

示例#20

0

显示文件

def test_const():
    RULE = rule(
        'a'
    ).interpretation(
        const(1)
    )
    parser = Parser(RULE)
    match = parser.match('a')
    assert match.fact == 1

示例#21

0

显示文件

文件： test_interpretation.py 项目： bureaucratic-labs/yargy

def test_inflected():
    RULE = rule(
        'московским'
    ).interpretation(
        inflected({'nomn', 'femn'})
    )
    parser = Parser(RULE)
    match = parser.match('московским')
    assert match.fact == 'московская'

示例#22

0

显示文件

def text_normalized():
    RULE = rule(
        'московским'
    ).interpretation(
        normalized()
    )
    parser = Parser(RULE)
    match = parser.match('московским')
    assert match.fact == 'московский'

示例#23

0

显示文件

def test_rule_custom():
    RULE = rule(
        '3', '.', '14'
    ).interpretation(
        custom(float)
    )
    parser = Parser(RULE)
    match = parser.match('3.14')
    assert match.fact == 3.14

示例#24

0

显示文件

文件： test_interpretation.py 项目： vitalyvels/yargy

def test_attribute_custom_custom():
    F = fact('F', 'a')
    MAPPING = {'a': 1}
    RULE = rule('A').interpretation(F.a.custom(str.lower).custom(
        MAPPING.get)).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('A')
    record = match.fact
    assert record == F(a=1)

示例#25

0

显示文件

文件： test_interpretation.py 项目： vitalyvels/yargy

def test_attribute_custom():
    F = fact('F', 'a')
    RULE = rule('1').interpretation(F.a.custom(int)).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('1')
    record = match.fact
    assert record == F(a=1)
    assert record.spans == [(0, 1)]
    assert record.as_json == {'a': 1}

示例#26

0

显示文件

文件： test_tagger.py 项目： vitalyvels/yargy

def test_tagger():
    text = 'a b c d e f g'
    A = tag('I').repeatable()
    parser = Parser(A, tagger=MyTagger())

    matches = parser.findall(text)
    spans = [_.span for _ in matches]
    substrings = [text[start:stop] for start, stop in spans]
    assert substrings == ['b c', 'e f']

示例#27

0

显示文件

文件： test_interpretation.py 项目： vitalyvels/yargy

def test_attribute_normalized():
    F = fact('F', 'a')
    RULE = rule('январе').interpretation(F.a.normalized()).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('январе')
    record = match.fact
    assert record == F(a='январь')
    assert record.spans == [(0, 6)]
    assert record.as_json == {'a': 'январь'}

示例#28

0

显示文件

文件： test_interpretation.py 项目： vitalyvels/yargy

def test_rule_attribute():
    F = fact('F', ['a'])
    RULE = rule('a', 'A').interpretation(F.a).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('a   A')
    record = match.fact
    assert record == F(a='a A')
    assert record.spans == [(0, 5)]
    assert record.as_json == {'a': 'a A'}

示例#29

0

显示文件

文件： extract_module1.py 项目： RomanKotybeev/MedService

 def find_factors(factor_types):
     for i in range(len(factor_types)):
         factor_lst = []
         FACT_RULE = morph_pipeline(factor_types[i])
         parser = Parser(FACT_RULE)
         for match in parser.findall(text):
             factor_lst.append(' '.join([_.value for _ in match.tokens]))
             factors_span.append(match.span)
         if factor_lst:
             factors.append(i+1)

示例#30

0

显示文件

def test_constant_attribute():
    MONEY_RULE = rule(
        gram('INT').interpretation(Money.count),
        dictionary({'тысяча'}).interpretation(Money.base.const(10**3)),
        dictionary({'рубль', 'доллар'}).interpretation(Money.currency),
    ).interpretation(Money)

    parser = Parser(MONEY_RULE)
    matches = list(parser.match('1 тысяча рублей'))
    assert matches[0].fact == Money(count=1, base=1000, currency='рублей')

示例#31

0

显示文件

def test_rule_custom_custom():
    MAPPING = {'a': 1}
    RULE = rule(
        'A'
    ).interpretation(
        custom(str.lower).custom(MAPPING.get)
    )
    parser = Parser(RULE)
    match = parser.match('A')
    assert match.fact == 1

示例#32

0

显示文件

def test_attribute_const():
    F = fact('F', 'a')
    RULE = rule(
        'январь'
    ).interpretation(
        F.a.const(1)
    )
    parser = Parser(RULE)
    match = parser.match('январь')
    assert match.fact == 1

示例#33

0

显示文件

文件： test_interpretation.py 项目： bureaucratic-labs/yargy

def test_attribute_const():
    F = fact('F', 'a')
    RULE = rule(
        'январь'
    ).interpretation(
        F.a.const(1)
    )
    parser = Parser(RULE)
    match = parser.match('январь')
    assert match.fact == 1

示例#34

0

显示文件

def test_attribute():
    F = fact('F', 'a')
    RULE = rule(
        'a'
    ).interpretation(
        F.a
    )
    parser = Parser(RULE)
    match = parser.match('a')
    assert match.fact == 'a'

示例#35

0

显示文件

文件： test_interpretation.py 项目： bureaucratic-labs/yargy

def test_rule_custom_custom():
    MAPPING = {'a': 1}
    RULE = rule(
        'A'
    ).interpretation(
        custom(str.lower).custom(MAPPING.get)
    )
    parser = Parser(RULE)
    match = parser.match('A')
    assert match.fact == 1

示例#36

0

显示文件

文件： test_interpretation.py 项目： vitalyvels/yargy

def test_insted_attributes():
    F = fact('F', ['a', 'b'])
    RULE = rule(eq('a').interpretation(F.a)).interpretation(
        F.b).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('a')
    record = match.fact
    assert record == F(a=None, b='a')
    assert record.spans == [(0, 1)]
    assert record.as_json == {'b': 'a'}

示例#37

0

显示文件

文件： test_interpretation.py 项目： vitalyvels/yargy

def test_attribute_inflected():
    F = fact('F', 'a')
    RULE = rule('январе').interpretation(F.a.inflected({'nomn', 'plur'
                                                        })).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('январе')
    record = match.fact
    assert record == F(a='январи')
    assert record.spans == [(0, 6)]
    assert record.as_json == {'a': 'январи'}

示例#38

0

显示文件

文件： test_interpretation.py 项目： vitalyvels/yargy

def test_repeatable():
    F = fact('F', [attribute('a').repeatable()])
    RULE = rule(eq('a').interpretation(F.a),
                eq('b').interpretation(F.a)).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('a b')
    record = match.fact
    assert record == F(a=['a', 'b'])
    assert record.spans == [(0, 1), (2, 3)]
    assert record.as_json == {'a': ['a', 'b']}

示例#39

0

显示文件

文件： test_interpretation.py 项目： bureaucratic-labs/yargy

def test_attribute():
    F = fact('F', 'a')
    RULE = rule(
        'a'
    ).interpretation(
        F.a
    )
    parser = Parser(RULE)
    match = parser.match('a')
    assert match.fact == 'a'

示例#40

0

显示文件

文件： test_interpretation.py 项目： bureaucratic-labs/yargy

def test_predicate_attribute():
    F = fact('F', ['a'])
    RULE = rule(
        eq('a').interpretation(F.a)
    ).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('a')
    record = match.fact
    assert record == F(a='a')
    assert record.spans == [(0, 1)]
    assert record.as_json == {'a': 'a'}

示例#41

0

显示文件

文件： test_interpretation.py 项目： bureaucratic-labs/yargy

def test_inflected_custom():
    MONTHS = {
        'январь': 1
    }
    RULE = rule(
        'январе'
    ).interpretation(
        inflected({'nomn', 'sing'}).custom(MONTHS.get)
    )
    parser = Parser(RULE)
    match = parser.match('январе')
    assert match.fact == 1

示例#42

0

显示文件

文件： test_tagger.py 项目： bureaucratic-labs/yargy

def test_tagger():
    text = 'a b c d e f g'
    A = tag('I').repeatable()
    parser = Parser(A, tagger=MyTagger())

    matches = parser.findall(text)
    spans = [_.span for _ in matches]
    substrings = [
        text[start:stop]
        for start, stop in spans
    ]
    assert substrings == ['b c', 'e f']

示例#43

0

显示文件

文件： test_interpretation.py 项目： bureaucratic-labs/yargy

def test_normalized_custom():
    MONTHS = {
        'январь': 1
    }
    RULE = rule(
        'январе'
    ).interpretation(
        normalized().custom(MONTHS.get)
    )
    parser = Parser(RULE)
    match = parser.match('январе')
    assert match.fact == 1

示例#44

0

显示文件

文件： test_interpretation.py 项目： bureaucratic-labs/yargy

def test_rule_attribute_custom():
    F = fact('F', ['a'])
    RULE = rule(
        '1'
    ).interpretation(
        F.a
    ).interpretation(
        custom(int)
    )
    parser = Parser(RULE)
    match = parser.match('1')
    assert match.fact == 1

示例#45

0

显示文件

文件： test_interpretation.py 项目： bureaucratic-labs/yargy

def test_insted_attributes():
    F = fact('F', ['a', 'b'])
    RULE = rule(
        eq('a').interpretation(F.a)
    ).interpretation(
        F.b
    ).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('a')
    record = match.fact
    assert record == F(a=None, b='a')
    assert record.spans == [(0, 1)]
    assert record.as_json == {'b': 'a'}

示例#46

0

显示文件

文件： test_interpretation.py 项目： bureaucratic-labs/yargy

def test_attribute_custom_custom():
    F = fact('F', 'a')
    MAPPING = {'a': 1}
    RULE = rule(
        'A'
    ).interpretation(
        F.a.custom(str.lower).custom(MAPPING.get)
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('A')
    record = match.fact
    assert record == F(a=1)

示例#47

0

显示文件

文件： test_interpretation.py 项目： bureaucratic-labs/yargy

def test_repeatable():
    F = fact('F', [attribute('a').repeatable()])
    RULE = rule(
        eq('a').interpretation(F.a),
        eq('b').interpretation(F.a)
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('a b')
    record = match.fact
    assert record == F(a=['a', 'b'])
    assert record.spans == [(0, 1), (2, 3)]
    assert record.as_json == {'a': ['a', 'b']}

示例#48

0

显示文件

文件： test_interpretation.py 项目： bureaucratic-labs/yargy

def test_attribute_inflected():
    F = fact('F', 'a')
    RULE = rule(
        'январе'
    ).interpretation(
        F.a.inflected({'nomn', 'plur'})
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('январе')
    record = match.fact
    assert record == F(a='январи')
    assert record.spans == [(0, 6)]
    assert record.as_json == {'a': 'январи'}

示例#49

0

显示文件

文件： test_interpretation.py 项目： bureaucratic-labs/yargy

def test_attribute_normalized():
    F = fact('F', 'a')
    RULE = rule(
        'январе'
    ).interpretation(
        F.a.normalized()
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('январе')
    record = match.fact
    assert record == F(a='январь')
    assert record.spans == [(0, 6)]
    assert record.as_json == {'a': 'январь'}

示例#50

0

显示文件

文件： test_interpretation.py 项目： bureaucratic-labs/yargy

def test_attribute_custom():
    F = fact('F', 'a')
    RULE = rule(
        '1'
    ).interpretation(
        F.a.custom(int)
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('1')
    record = match.fact
    assert record == F(a=1)
    assert record.spans == [(0, 1)]
    assert record.as_json == {'a': 1}

示例#51

0

显示文件

文件： test_interpretation.py 项目： bureaucratic-labs/yargy

def test_normalized_custom_attribute():
    F = fact('F', ['a'])
    MONTHS = {
        'январь': 1
    }
    RULE = rule(
        'январе'
    ).interpretation(
        F.a.normalized().custom(MONTHS.get)
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('январе')
    assert match.fact == F(a=1)

示例#52

0

显示文件

文件： test_interpretation.py 项目： bureaucratic-labs/yargy

def test_inflected_custom_attribute():
    F = fact('F', ['a'])
    MONTHS = {
        'январь': 1
    }
    RULE = rule(
        'январе'
    ).interpretation(
        F.a.inflected({'nomn', 'sing'}).custom(MONTHS.get)
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('январе')
    assert match.fact == F(a=1)

示例#53

0

显示文件

文件： test_interpretation.py 项目： bureaucratic-labs/yargy

def test_merge_facts():
    F = fact('F', ['a', 'b'])
    A = rule(
        eq('a').interpretation(F.a)
    ).interpretation(F)
    B = rule(
        eq('b').interpretation(F.b)
    ).interpretation(F)
    RULE = rule(
        A, B
    ).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('a b')
    record = match.fact
    assert record == F(a='a', b='b')
    assert record.spans == [(0, 1), (2, 3)]
    assert record.as_json == {'a': 'a', 'b': 'b'}

示例#54

0

显示文件

文件： extractors.py 项目： bureaucratic-labs/natasha

class Extractor(object):
    def __init__(self, rule, tokenizer=TOKENIZER, tagger=None):
        self.parser = Parser(rule, tokenizer=tokenizer, tagger=tagger)

    def __call__(self, text):
        text = normalize_text(text)
        matches = self.parser.findall(text)
        return Matches(text, matches)

示例#55

0

显示文件

文件： test_interpretation.py 项目： bureaucratic-labs/yargy

def test_nested_facts():
    F = fact('F', ['a'])
    G = fact('G', ['b'])
    RULE = rule(
        eq('a').interpretation(F.a)
    ).interpretation(
        F
    ).interpretation(
        G.b
    ).interpretation(
        G
    )
    parser = Parser(RULE)
    match = parser.match('a')
    record = match.fact
    assert record == G(b=F(a='a'))
    assert record.spans == [(0, 1)]
    assert record.as_json == {'b': {'a': 'a'}}

示例#56

0

显示文件

文件： extractors.py 项目： bureaucratic-labs/natasha

 def __init__(self, rule, tokenizer=TOKENIZER, tagger=None):
     self.parser = Parser(rule, tokenizer=tokenizer, tagger=tagger)

示例#57

0

显示文件

文件： test_pipeline.py 项目： bureaucratic-labs/yargy

def test_pipeline():
    RULE = rule(
        pipeline(['a b c', 'b c']),
        'd'
    )
    parser = Parser(RULE)
    assert parser.match('b c d')
    assert parser.match('a b c d')

    RULE = rule(
        pipeline(['a b']).repeatable(),
        'c'
    )
    parser = Parser(RULE)
    assert parser.match('a b a b c')

    RULE = rule(
        caseless_pipeline(['A B']),
        'c'
    )
    parser = Parser(RULE)
    assert parser.match('A b c')

    RULE = morph_pipeline([
        'текст',
        'текст песни',
        'материал',
        'информационный материал',
    ])
    parser = Parser(RULE)
    matches = list(parser.findall('текстом песни музыкальной группы'))
    assert len(matches) == 1
    match = matches[0]
    assert [_.value for _ in match.tokens] == ['текстом', 'песни']

    matches = list(parser.findall('информационного материала под названием'))
    assert len(matches) == 1
    match = matches[0]
    assert [_.value for _ in match.tokens] == ['информационного', 'материала']

    RULE = morph_pipeline(['1 B.'])
    parser = Parser(RULE)
    assert parser.match('1 b .')