示例#1
0
    POSELOK_WORDS,
    POSELOK_NAME
).interpretation(
    Settlement
)


##############
#
#   ADDRESS PERSON
#
############


ABBR = and_(
    length_eq(1),
    is_title()
)

PART = and_(
    TITLE,
    or_(
        gram('Name'),
        gram('Surn')
    )
)

MAYBE_FIO = or_(
    rule(TITLE, PART),
    rule(PART, TITLE),
    rule(ABBR, '.', TITLE),
示例#2
0
)

YEAR_WORD = or_(
    rule('г', eq('.').optional()),
    rule(normalized('год'))
)

YEAR = and_(
    gte(1000),
    lte(2100)
).interpretation(
    Date.year.custom(int)
)

YEAR_SHORT = and_(
    length_eq(2),
    gte(0),
    lte(99)
).interpretation(
    Date.year.custom(lambda _: 1900 + int(_))
)

ERA_YEAR = and_(
    gte(1),
    lte(100000)
).interpretation(
    Date.year.custom(int)
)

ERA_WORD = rule(
    eq('до'),
示例#3
0
MULTIPLIER = or_(MILLION, THOUSAND).interpretation(Money.multiplier)

#######
#
#   AMOUNT
#
########


def normalize_integer(value):
    #     integer = re.sub('[\s\.,]+', '', value)
    integer = re.sub('[\s\.,](\S|$|\s$)', '\g<1>', value)
    return integer


PART = and_(INT, length_eq(3))

SEP = in_(',.')

INTEGER = or_(
    rule(INT),
    rule(INT, PART),
    rule(INT, PART, PART),
    rule(INT, SEP, PART),
    rule(INT, SEP, PART, SEP, PART),
)

# *Вилка*: 150к-250к (примерно 50-100, 120-180, 180-250 Junior/Middle/Senior) gross + премия 20% годового дохода по KPI
# give 150 -250, 50 - 100120, 180 - 180250 due to ',' - is in SEP for '5,000' and yargy ignore spaces
# and if we would ignore ',' in SEP we wouldn't able to catch '5,000' like samples
示例#4
0
}

MONTH_NAME = dictionary(MONTHS).interpretation(Date.month.normalized().custom(
    MONTHS.__getitem__))

MONTH = and_(gte(1), lte(12)).interpretation(Date.month.custom(int))

DAY = and_(gte(1), lte(31)).interpretation(Date.day.custom(int))

COUNT = and_(gte(1), ).interpretation(Date.day.custom(int))

YEAR_WORD = or_(rule('г', eq('.').optional()), rule(normalized('год')))

YEAR = and_(gte(1000), lte(2100)).interpretation(Date.year.custom(int))

YEAR_SHORT = and_(length_eq(2), gte(0), lte(99)).interpretation(
    Date.year.custom(lambda _: 2000 + int(_)))

ERA_YEAR = and_(gte(1), lte(100000)).interpretation(Date.year.custom(int))

ERA_WORD = rule(
    eq('до'),
    or_(rule('н', eq('.'), 'э',
             eq('.').optional()),
        rule(normalized('наша'),
             normalized('эра')))).interpretation(Date.current_era.const(False))

DATE = or_(
    rule(DAY, '.', MONTH, '.', or_(YEAR, YEAR_SHORT), YEAR_WORD.optional()),
    rule(DAY, '.', MONTH), rule(YEAR, YEAR_WORD), rule(DAY, MONTH_NAME),
    rule(MONTH_NAME, YEAR, YEAR_WORD.optional()),
示例#5
0
NUMERAL = rule(BOUND, PART.repeatable(), BOUND)

#######
#
#   AMOUNT
#
########


def normalize_integer(value):
    integer = re.sub('[\s.,]+', '', value)
    return int(integer)


PART = and_(INT, length_eq(3))

SEP = in_(',.')

INTEGER = or_(
    rule(INT),
    rule(INT, PART),
    rule(INT, PART, PART),
    rule(INT, SEP, PART),
    rule(INT, SEP, PART, SEP, PART),
).interpretation(Money.integer.custom(normalize_integer))

FRACTION = and_(INT,
                or_(length_eq(1),
                    length_eq(2))).interpretation(Money.fraction.custom(int))
#   AMOUNT
#
########


def normalize_integer(value):
    integer = re.sub('[\s.,]+', '', value)
    return int(integer)


def normalize_fraction(value):
    fraction = value.ljust(2, '0')
    return int(fraction)


PART = and_(INT, length_eq(3))

SEP = in_(',.')

INTEGER = or_(
    rule(INT),
    rule(INT, PART),
    rule(INT, PART, PART),
    rule(INT, SEP, PART),
    rule(INT, SEP, PART, SEP, PART),
).interpretation(Money.integer.custom(normalize_integer))

FRACTION = and_(INT, or_(length_eq(1), length_eq(2))).interpretation(
    Money.fraction.custom(normalize_fraction))

AMOUNT = rule(
示例#7
0
########


def normalize_integer(value):
    integer = re.sub('[\s.,]+', '', value)
    return int(integer)


def normalize_fraction(value):
    fraction = value.ljust(2, '0')
    return int(fraction)


PART = and_(
    INT,
    length_eq(3)
)

SEP = in_(',.')

INTEGER = or_(
    rule(INT),
    rule(INT, PART),
    rule(INT, PART, PART),
    rule(INT, SEP, PART),
    rule(INT, SEP, PART, SEP, PART),
).interpretation(
    Money.integer.custom(normalize_integer)
)

FRACTION = and_(
示例#8
0
    POSELOK_WORDS,
    POSELOK_NAME
).interpretation(
    Settlement
)


##############
#
#   ADDR PERSON
#
############


ABBR = and_(
    length_eq(1),
    is_title()
)

PART = and_(
    TITLE,
    or_(
        gram('Name'),
        gram('Surn')
    )
)

MAYBE_FIO = or_(
    rule(TITLE, PART),
    rule(PART, TITLE),
    rule(ABBR, '.', TITLE),
示例#9
0
    rule(normalized('посёлок')),
    rule(caseless('р'), DOT.optional(), caseless('п'), DOT.optional()),
    rule(normalized('рабочий'), normalized('посёлок'))).interpretation(
        Settlement.type.const('посёлок'))

POSELOK_NAME = SETTLEMENT_NAME.interpretation(Settlement.name)

POSELOK = rule(POSELOK_WORDS, POSELOK_NAME).interpretation(Settlement)

##############
#
#   ADDRESS PERSON
#
############

ABBR = and_(length_eq(1), is_title())

PART = and_(TITLE, or_(gram('Name'), gram('Surn')))

MAYBE_FIO = or_(rule(TITLE, PART), rule(PART, TITLE), rule(ABBR, '.', TITLE),
                rule(ABBR, '.', ABBR, '.', TITLE),
                rule(TITLE, ABBR, '.', ABBR, '.'))

POSITION_WORDS_ = or_(
    rule(
        dictionary({
            'мичман',
            'геолог',
            'подводник',
            'краевед',
            'снайпер',
示例#10
0
from yargy.predicates import gram, is_capitalized, dictionary, is_upper, length_eq
from docx import Document
from docx.shared import Inches
import list_header as lh

list_header = lh.list_header


class CompetitionResult():
    def __init__(self, FGOS, competition, result):
        self.FGOS = FGOS
        self.competition = competition
        self.result = result


FGOS = rule(and_(is_upper(), or_(length_eq(2), length_eq(3))))

IsCodeFGOS = rule(dictionary({'код', 'ФГОС'}))
IsCompetitions = rule(dictionary({'компетенция', 'содержание'}))
IsResults = rule(dictionary({'результат'}))
TableIsCompetitionsAndResults = rule(
    dictionary({'результат', 'ФГОС', 'компетенция'}))

parser_Table = Parser(TableIsCompetitionsAndResults)
parser_FGOS = Parser(IsCodeFGOS)
parser_Result = Parser(IsResults)
parser_Competition = Parser(IsCompetitions)
parser_code_FGOS = Parser(FGOS)


# из текста
示例#11
0
#  COMPONENTS
#
###########

IS_FIRST = dictionary(FIRST_DICT)

MAYBE_FIRST = or_(
    and_(
        gram('Name'),
        not_(gram('Abbr'))  # А. Леонидов
    ),
    dictionary(MAYBE_FIRST_DICT))

TITLE_FIRST = and_(or_(IS_FIRST, MAYBE_FIRST), is_capitalized())

TITLE_FIRST_ABBR = and_(length_eq(1), is_capitalized())

TITLE_MIDDLE = and_(
    gram('Patr'),
    not_(gram('Abbr')),  # Фил О’Рейли -> "О" is Patr
    is_capitalized())

TITLE_MIDDLE_ABBR = and_(length_eq(1), is_capitalized())

IS_LAST = dictionary(LAST_DICT)

MAYBE_LAST = or_(gram('Surn'), dictionary(MAYBE_LAST_DICT))

TITLE_LAST = and_(or_(IS_LAST, MAYBE_LAST), is_capitalized())

#########
示例#12
0
文件: name.py 项目: litehause/natasha
#  COMPONENTS
#
###########

IS_FIRST = dictionary(FIRST_DICT)

MAYBE_FIRST = or_(
    and_(
        gram('Name'),
        not_(gram('Abbr'))  # А. Леонидов
    ),
    dictionary(MAYBE_FIRST_DICT))

TITLE_FIRST = and_(or_(IS_FIRST, MAYBE_FIRST), is_title())

TITLE_FIRST_ABBR = and_(length_eq(1), is_title())

TITLE_MIDDLE = and_(
    gram('Patr'),
    not_(gram('Abbr')),  # Фил О’Рейли -> "О" is Patr
    is_title())

TITLE_MIDDLE_ABBR = and_(length_eq(1), is_title())

IS_LAST = dictionary(LAST_DICT)

MAYBE_LAST = or_(gram('Surn'), dictionary(MAYBE_LAST_DICT))

TITLE_LAST = and_(or_(IS_LAST, MAYBE_LAST), is_title())

#########
示例#13
0
from yargy import Parser, rule, and_, or_
from yargy.predicates import gram, is_capitalized, dictionary, is_upper, length_eq
from docx import Document
from docx.shared import Inches

class CompetitionResult():
    def __init__(self, FGOS, competition, result):
        self.FGOS = FGOS
        self.competition = competition
        self.result = result

FGOS = rule(
     and_(
         is_upper(),
         or_(
            length_eq(2),
            length_eq(3)
         )
        )
)

IsCodeFGOS = rule(
    dictionary(
    {'код', 'ФГОС'})
)
IsCompetitions = rule(dictionary(
    {
    'компетенция'
    }
))
IsResults = rule(dictionary(
示例#14
0
文件: lector.py 项目: blackmius/mtt
    'Name',
    [attribute('first', ''),
     attribute('middle', ''),
     attribute('last', '')])

DOT = eq('.')

LAST = and_(type('RU'), is_capitalized(),
            length_grt(1)).interpretation(Name.last.custom(str.capitalize))

FIRST = and_(gram('Name'),
             length_grt(1)).interpretation(Name.first.custom(str.capitalize))

MIDDLE = and_(gram('Patr'),
              length_grt(1)).interpretation(Name.middle.custom(str.capitalize))
ABBR = and_(length_eq(1), is_capitalized())

FIRST_ABBR = ABBR.interpretation(Name.first.custom(str.upper))

MIDDLE_ABBR = ABBR.interpretation(Name.middle.custom(str.upper))

unsubstantial = {'Бен Режеб Т.Б.К.'}

UNSUBSTANIAL = caseless_pipeline(unsubstantial).interpretation(Name)

NAME = or_(
    rule(UNSUBSTANIAL),
    rule(LAST, DOT, FIRST_ABBR, DOT, MIDDLE_ABBR, DOT),
    rule(LAST, FIRST_ABBR, DOT, MIDDLE_ABBR, DOT),
    rule(FIRST_ABBR, DOT, MIDDLE_ABBR, DOT, LAST),
    rule(LAST, FIRST_ABBR, DOT),
示例#15
0
# Еще немного пытался, но потом решил просто делать то, что работает.

Street = fact(
    'Street',
    ['name', 'street_type']
)

# Сначала длеаю обрабочтик для названий улиц с именами
# Сразу снизу слово для "подзаборынх академиков"
WITH_NAME = or_(
    and_(ADJF, not_(APRO)),
    and_(NOUN, GEN)
)

# Одна буковка, совсем одна
SINGLE = length_eq(1)

# Мои попытки сделать универсальный парсер, а именно "двойные стандарты".
# То, что с заглавной буквы может быть названием улицы без ключевого слова.
# То, что без регистра должно иметь хоть какой-то ключ. 
# Позже увидел тесты и забил на все это.
PART_UPPER = and_(
    TITLE,
    or_(
        gram('Name'),
        gram('Surn')
    )
)

PART_NOCASE = or_(
    gram('Name'),