def test_const(): RULE = rule( 'a' ).interpretation( const(1) ) parser = Parser(RULE) match = parser.match('a') assert match.fact == 1
def _abbreviate(word: str, abbrs: List[str], opt=False): abbrs, dashed = partition(lambda abbr: '-' in abbr, abbrs) dashed = map( lambda a: rule(*map(caseless, intersperse('-', a.split('-')))), dashed) original_word = rule(normalized(word)) dashed_sequence = rule(or_(*dashed)) abbr_with_dot = rule( or_(*map(caseless, abbrs)), eq('.').optional(), ) result = or_(original_word, dashed_sequence, abbr_with_dot) \ .interpretation(interpretation.const(word)) return result.optional() if opt else result
def make_rule_from_station(title: str) -> Rule: title = title.replace('1', '').replace('2', '').lower().strip() phrase = [] for token in title.split(' '): word = Abbrs.get(token) if Abbrs.is_abbr(token) \ else normalized(token).interpretation(meaning.const(token)) phrase.append(word.interpretation(Array.element)) phrase = rule(*phrase).means(Array).interpretation( meaning.custom(lambda p: Restore.get(' '.join(p.element)))).means( StationTitle.value) if Synonyms.has(title): synonym = Synonyms.get(title).interpretation( meaning.custom(lambda p: Restore.get(p))).means(StationTitle.value) return or_(synonym, phrase) return phrase
attribute('currency', '-'), attribute('multiplier', -1), attribute('period', '-') ]) DOT = eq('.') INT = type('INT') ######## # # CURRENCY # ########## EURO = or_(normalized('евро'), normalized('euro'), eq('€'), caseless('EUR')).interpretation(const('EUR')) DOLLARS = or_(normalized('доллар'), normalized('дол'), normalized('dollar'), eq('$'), caseless('USD')).interpretation(const('USD')) RUBLES = or_( rule(normalized('ruble')), rule(normalized('рубль')), rule(normalized('рубл')), rule( or_( caseless('руб'), caseless('rub'), # caseless('rur'), caseless('р'), eq('₽')),
# # CURRENCY # ########## # EURO = or_( # normalized('евро'), # #in_(['€', 'EUR']) # eq('€'), # #eq('EUR') # ).interpretation( # const(dsl.EURO) # ) # EURO = caseless_pipeline(['евро', '€', 'eur'])#.interpretation(const(dsl.EURO)) EURO = or_(normalized('евро'), eq('€'), eq('EUR')).interpretation(const(dsl.EURO)) DOLLARS = or_(normalized('доллар'), eq('$'), eq('USD')).interpretation(const(dsl.DOLLARS)) RUBLES = or_( rule(normalized('рубль')), rule(or_(caseless('руб'), caseless('р'), eq('₽')), DOT.optional())).interpretation(const(dsl.RUBLES)) CURRENCY = or_(EURO, DOLLARS, RUBLES).interpretation(Money.currency) # TODO: копейки и центы тоже можно выпилить для ускорения KOPEIKA = or_(rule(normalized('копейка')), rule(or_(caseless('коп'), caseless('к')), DOT.optional())) CENT = or_(normalized('цент'), eq('¢'))
'двести': 200, 'триста': 300, 'четыреста': 400, 'пятьсот': 500, 'шестьсот': 600, 'семьсот': 700, 'восемьсот': 800, 'девятьсот': 900, 'тысяча': 10**3, 'миллион': 10**6, 'миллиард': 10**9, 'триллион': 10**12, } DOT = eq('.') INT = type('INT') THOUSANDTH = rule(caseless_pipeline(['тысячных', 'тысячная'])).interpretation(const(10**-3)) HUNDREDTH = rule(caseless_pipeline(['сотых', 'сотая'])).interpretation(const(10**-2)) TENTH = rule(caseless_pipeline(['десятых', 'десятая'])).interpretation(const(10**-1)) THOUSAND = or_( rule(caseless('т'), DOT), rule(caseless('тыс'), DOT.optional()), rule(normalized('тысяча')), rule(normalized('тыща')) ).interpretation(const(10**3)) MILLION = or_( rule(caseless('млн'), DOT.optional()), rule(normalized('миллион')) ).interpretation(const(10**6)) MILLIARD = or_( rule(caseless('млрд'), DOT.optional()), rule(normalized('миллиард'))
max = self.max.normalized if not min.currency: min.currency = max.currency return dsl.Range(min, max) DOT = eq('.') INT = type('INT') ######## # # CURRENCY # ########## EURO = or_(normalized('евро'), eq('€')).interpretation(const(dsl.EURO)) DOLLARS = or_(normalized('доллар'), eq('$')).interpretation(const(dsl.DOLLARS)) RUBLES = or_( rule(normalized('рубль')), rule(or_(caseless('руб'), caseless('р'), eq('₽')), DOT.optional())).interpretation(const(dsl.RUBLES)) CURRENCY = or_(EURO, DOLLARS, RUBLES).interpretation(Money.currency) KOPEIKA = or_(rule(normalized('копейка')), rule(or_(caseless('коп'), caseless('к')), DOT.optional())) CENT = or_(normalized('цент'), eq('¢'))
'триста': 300, 'четыреста': 400, 'пятьсот': 500, 'шестьсот': 600, 'семьсот': 700, 'восемьсот': 800, 'девятьсот': 900, 'тысяча': 10**3, 'миллион': 10**6, 'миллиард': 10**9, 'триллион': 10**12, } DOT = eq('.') INT = type('INT') THOUSANDTH = rule(caseless_pipeline(['тысячных', 'тысячная' ])).interpretation(const(10**-3)) HUNDREDTH = rule(caseless_pipeline(['сотых', 'сотая'])).interpretation(const(10**-2)) TENTH = rule(caseless_pipeline(['десятых', 'десятая'])).interpretation(const(10**-1)) THOUSAND = or_(rule(caseless('т'), DOT), rule(caseless('тыс'), DOT.optional()), rule(normalized('тысяча')), rule(normalized('тыща'))).interpretation(const(10**3)) MILLION = or_(rule(caseless('млн'), DOT.optional()), rule(normalized('миллион'))).interpretation(const(10**6)) MILLIARD = or_(rule(caseless('млрд'), DOT.optional()), rule(normalized('миллиард'))).interpretation(const(10**9)) TRILLION = or_(rule(caseless('трлн'), DOT.optional()), rule(normalized('триллион'))).interpretation(const(10**12)) MULTIPLIER = or_(THOUSANDTH, HUNDREDTH, TENTH, THOUSAND, MILLION, MILLIARD, TRILLION).interpretation(Number.multiplier)
def _synonymize(word: str, syns: List[str]) -> Rule: return rule(or_(*map(normalized, syns))) \ .interpretation(interpretation.const(word))
DOT = eq('.') INT = type('INT') ######## # # CURRENCY # ########## EURO = or_( normalized('евро'), eq('€') ).interpretation( const(dsl.EURO) ) DOLLARS = or_( normalized('доллар'), eq('$') ).interpretation( const(dsl.DOLLARS) ) RUBLES = or_( rule(normalized('рубль')), rule( or_( caseless('руб'), caseless('р'),