예제 #1
0
파일: brand.py 프로젝트: temamagic/natasha
class Brand(Enum):

    Latin = [{
        'labels': [
            gram('LATN'),
            is_capitalized(True),
        ],
        'repeatable': True,
        'normalization': NormalizationType.Original,
        'interpretation': {
            'attribute': [
                OrganisationObject.Attributes.Name,
            ]
        },
    }, {
        'labels': [
            gram('INT'),
        ],
        'optional': True,
        'normalization': NormalizationType.Original,
        'interpretation': {
            'attribute': [
                OrganisationObject.Attributes.Name,
            ]
        },
    }]

    WithConj = [
        Latin[0],
        {
            'labels': [
                in_({
                    '&',
                    '/',
                }),
            ],
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': [
                    OrganisationObject.Attributes.Name,
                ]
            },
        },
        Latin[0],
    ]

    Trademark = [{
        'labels': [
            gram('Trad'),
        ],
        'normalization': NormalizationType.Original,
        'interpretation': {
            'attribute': [
                OrganisationObject.Attributes.Name,
            ]
        },
    }]
예제 #2
0
파일: event.py 프로젝트: temamagic/natasha
class Event(Enum):

    Object = [
        {
            'labels': [
                gram('NOUN'),
                dictionary(EVENT_TYPE_DICTIONARY),
            ],
        },
        {
            'labels': [
                gram('QUOTE'),
            ],
        },
        {
            'labels': [
                gram_not('QUOTE'),
            ],
            'repeatable': True,
        },
        {
            'labels': [
                gram('QUOTE'),
            ],
        },
    ]

    # Московский международный форум
    AdjWithDescriptor = [
        {
            'labels': [
                gram('ADJF'),
            ],
        },
        {
            'labels': [
                gram('ADJF'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'optional': True,
            'repeatable': True,
        },
        {
            'labels': [
                dictionary(EVENT_TYPE_DICTIONARY),
                gnc_match(0, solve_disambiguation=True),
                gnc_match(-1, solve_disambiguation=True),
            ],
        },
    ]
예제 #3
0
class ProbabilisticOrganisation(Enum):

    # "Коммерсантъ" сообщил ...
    NounQuoted = [
        {
            'labels': [
                gram('QUOTE'),
            ],
            'skip': True,
        },
        {
            'labels': [
                is_capitalized(True),
                gram_any({
                    'NOUN',
                    'ADJF',
                    'LATN',
                }),
            ],
            'repeatable': True,
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                gram('QUOTE'),
            ],
            'skip': True,
        },
    ]

    EducationalWithInitials = Organisation.Educational.value + NAMED_ORG_INITIALS_PREFIX_RULE + PROBABILISTIC_NAMED_ORG_INITIALS_RULE
    SocialWithInitials = Organisation.Social.value + NAMED_ORG_INITIALS_PREFIX_RULE + PROBABILISTIC_NAMED_ORG_INITIALS_RULE
    AdjSocialWithInitials = Organisation.AdjSocial.value + NAMED_ORG_INITIALS_PREFIX_RULE + PROBABILISTIC_NAMED_ORG_INITIALS_RULE
    AdjCommercialWithInitials = Organisation.AdjCommercial.value + NAMED_ORG_INITIALS_PREFIX_RULE + PROBABILISTIC_NAMED_ORG_INITIALS_RULE

    EducationalWithLastname = Organisation.Educational.value + NAMED_ORG_INITIALS_PREFIX_RULE + POSSIBLE_LASTNAME_GRAMMAR
    SocialWithLastname = Organisation.Social.value + NAMED_ORG_INITIALS_PREFIX_RULE + POSSIBLE_LASTNAME_GRAMMAR
    AdjSocialWithLastname = Organisation.AdjSocial.value + NAMED_ORG_INITIALS_PREFIX_RULE + POSSIBLE_LASTNAME_GRAMMAR
    AdjCommercialWithLastname = Organisation.AdjCommercial.value + NAMED_ORG_INITIALS_PREFIX_RULE + POSSIBLE_LASTNAME_GRAMMAR
예제 #4
0
class Date(Enum):

    Full = [
        DAY_GRAMMAR,
        MONTH_GRAMMAR,
        YEAR_GRAMMAR,
    ]

    FullWithDigits = [
        DAY_GRAMMAR,
        {
            'labels': [
                gram('PUNCT'),
            ],
            'optional': True
        },
        {
            'labels': [
                gram('INT'),
                gte(1),
                lte(12),
            ],
        },
        {
            'labels': [
                gram('PUNCT'),
            ],
            'optional': True
        },
        YEAR_GRAMMAR,
    ]

    DayAndMonth = [
        DAY_GRAMMAR,
        MONTH_GRAMMAR,
    ]

    Year = [
        YEAR_GRAMMAR,
        YEAR_SUFFIX_GRAMMAR,
    ]

    PartialYearObject = [
        PARTIAL_DATE_GRAMMAR,
        YEAR_GRAMMAR,
        YEAR_SUFFIX_GRAMMAR,
    ]

    PartialMonthObject = [
        PARTIAL_DATE_GRAMMAR,
        MONTH_GRAMMAR,
    ]

    DayRange = [
        {
            'labels': [gram('INT-RANGE')]
        },
        MONTH_GRAMMAR,
    ]

    YearRange = [
        {
            'labels': [gram('INT-RANGE')]
        },
        {
            'labels': [
                dictionary({
                    'год',
                }),
            ],
        },
    ]

    Month = [
        MONTH_GRAMMAR,
    ]

    DayOfWeek = [
        DAY_OF_WEEK_GRAMMAR,
    ]

    MonthWithOffset = [
        DATE_OFFSET_PREFIX_GRAMMAR,
        MONTH_WITH_GNC_MATCHING_GRAMMAR,
    ]

    DayOfWeekWithOffset = [
        DATE_OFFSET_PREFIX_GRAMMAR,
        DAY_OF_WEEK_WITH_GNC_MATCHING_GRAMMAR,
    ]

    CurrentMonthWithOffset = [
        DATE_OFFSET_PREFIX_GRAMMAR, {
            'labels': [
                dictionary({
                    'месяц',
                }),
                gnc_match(-1, solve_disambiguation=True),
            ],
        }
    ]
예제 #5
0
}

DATE_OFFSET_PREFIX_DICTIONARY = {
    'следующий',
    'прошлый',
}

DATE_OFFSET_PREFIX_GRAMMAR = {
    'labels': [
        dictionary(DATE_OFFSET_PREFIX_DICTIONARY),
    ],
}

DAY_GRAMMAR = {
    'labels': [
        gram('INT'),
        gte(1),
        lte(31),
    ],
}

MONTH_GRAMMAR = {
    'labels': [
        dictionary(MONTH_DICTIONARY),
    ],
}

MONTH_WITH_GNC_MATCHING_GRAMMAR = {
    'labels': [
        dictionary(MONTH_DICTIONARY),
        gnc_match(-1, solve_disambiguation=True),
예제 #6
0
class Location(Enum):

    FederalDistrict = [
        {
            'labels': [
                gram('ADJF'),
                dictionary(FEDERAL_DISTRICT_DICTIONARY),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                gnc_match(-1, solve_disambiguation=True),
                dictionary({
                    'федеральный',
                }),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Descriptor,
            },
        },
        {
            'labels': [
                gnc_match(-1, solve_disambiguation=True),
                dictionary({
                    'округ',
                }),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Descriptor,
            },
        },
    ]

    FederalDistrictAbbr = [
        {
            'labels': [
                gram('ADJF'),
                dictionary(FEDERAL_DISTRICT_DICTIONARY),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                eq('ФО'),
            ],
            'interpretation': {
                'attribute': LocationObject.Attributes.Descriptor,
            },
        },
    ]

    AutonomousDistrict = [
        {
            'labels': [
                gram('ADJF'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                gnc_match(-1, solve_disambiguation=True),
                dictionary({
                    'автономный',
                }),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Descriptor,
            },
        },
        {
            'labels': [
                gnc_match(-1, solve_disambiguation=True),
                dictionary({
                    'округ',
                }),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Descriptor,
            },
        },
    ]

    AutonomousDistrictAbbr = [
        {
            'labels': [
                gram('ADJF'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                eq('АО'),
            ],
            'interpretation': {
                'attribute': LocationObject.Attributes.Descriptor,
            },
        },
    ]

    Region = [
        {
            'labels': [
                gram('ADJF'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                dictionary(REGION_TYPE_DICTIONARY),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Descriptor,
            },
        },
    ]

    ComplexObject = [
        {
            'labels': [
                gram('ADJF'),
                dictionary(COMPLEX_OBJECT_PREFIX_DICTIONARY),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                gram('NOUN'),
                gram('Geox'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
    ]

    PartialObject = [
        {
            'labels': [
                gram('NOUN'),
                dictionary(PARTIAL_OBJECT_PREFIX_DICTIONARY),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                gram('NOUN'),
                gram('Geox'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
    ]

    # Донецкая народная республика / Российская Федерация
    AdjfFederation = [
        {
            'labels': [
                gram('ADJF'),
                is_capitalized(True),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                gram('ADJF'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'optional': True,
            'repeatable': True,
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                gnc_match(0, solve_disambiguation=True),
                dictionary({
                    'федерация',
                    'республика',
                    'империя',
                }),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Descriptor,
            },
        },
    ]

    # Соединенные Штаты / Соединенные Штаты Америки
    AdjxFederation = [{
        'labels': [
            gram('Adjx'),
            is_capitalized(True),
        ],
        'normalization': NormalizationType.Inflected,
        'interpretation': {
            'attribute': LocationObject.Attributes.Name,
        },
    }, {
        'labels': [
            gram('Adjx'),
            gnc_match(-1, solve_disambiguation=True),
        ],
        'optional':
        True,
        'repeatable':
        True,
        'normalization':
        NormalizationType.Inflected,
        'interpretation': {
            'attribute': LocationObject.Attributes.Name,
        },
    }, {
        'labels': [
            gnc_match(0, solve_disambiguation=True),
            dictionary({
                'штат',
                'эмират',
            }),
        ],
        'normalization':
        NormalizationType.Inflected,
        'interpretation': {
            'attribute': LocationObject.Attributes.Name,
        },
    }, {
        'labels': [
            gram('gent'),
        ],
        'optional': True,
        'normalization': NormalizationType.Inflected,
        'interpretation': {
            'attribute': LocationObject.Attributes.Name,
        },
    }]

    Object = [
        {
            'labels': [
                is_capitalized(True),
                gram('Geox'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
    ]
예제 #7
0
class Address(Enum):

    # Садовая улица
    AdjFull = [
        {
            'labels': [
                gram('ADJF'),
                gram_not('Abbr'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': AddressObject.Attributes.Street_Name,
            },
        },
        {
            'labels': [
                gram('ADJF'),
                gram_not('Abbr'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'repeatable':
            True,
            'optional':
            True,
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': AddressObject.Attributes.Street_Name,
            },
        },
        {
            'labels': [
                dictionary(STREET_DESCRIPTOR_DICTIONARY),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': AddressObject.Attributes.Street_Descriptor,
            },
        },
    ]

    # улица Садовая
    AdjFullReversed = [
        {
            'labels': [
                dictionary(STREET_DESCRIPTOR_DICTIONARY),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': AddressObject.Attributes.Street_Descriptor,
            },
        },
        {
            'labels': [
                gram('ADJF'),
                gram_not('Abbr'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'repeatable':
            True,
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': AddressObject.Attributes.Street_Name,
            },
        },
    ]

    # ул. Садовая
    AdjShort = SHORT_STREET_DESCRIPTOR_RULE + AdjFull[:2]

    # Садовая ул.
    AdjShortReversed = AdjFull[:2] + SHORT_STREET_DESCRIPTOR_RULE

    # улица Красных Десантников
    AdjNounFull = [AdjFullReversed[0]] + AdjFull[:2] + [{
        'labels': [
            gram('gent'),
            gram_not('Abbr'),
            gnc_match(-1, solve_disambiguation=True),
        ],
        'repeatable':
        True,
        'normalization':
        NormalizationType.Inflected,
        'interpretation': {
            'attribute': AddressObject.Attributes.Street_Name,
        },
    }]

    # ул. Красных Десантников
    AdjNounShort = AdjShort + [AdjNounFull[-1]]

    # улица Карла Маркса
    GentFullReversed = [
        AdjFullReversed[0],
        {
            'labels': [
                gram('gent'),
                gram_not('Abbr'),
            ],
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': AddressObject.Attributes.Street_Name,
            },
        },
        {
            'labels': [
                gram('gent'),
                gram_not('Abbr'),
                gnc_match(-1, solve_disambiguation=True)
            ],
            'optional':
            True,
            'repeatable':
            True,
            'normalization':
            NormalizationType.Original,
            'interpretation': {
                'attribute': AddressObject.Attributes.Street_Name,
            },
        },
    ]

    # улица К. Маркса
    GentFullReversedWithShortcut = [
        GentFullReversed[0],
        {
            'labels': [
                gram('Abbr'),
            ],
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': AddressObject.Attributes.Street_Name,
            },
        },
        {
            'labels': [
                eq('.'),
            ],
            'normalization': NormalizationType.Original,
        },
    ] + GentFullReversed[1:]

    # улица В. В. Ленина
    GentFullReversedWithExtendedShortcut = GentFullReversedWithShortcut[:3] + GentFullReversedWithShortcut[
        1:3] + GentFullReversedWithShortcut[3:]

    # пр. Маршала жукова
    GentShortReversed = SHORT_STREET_DESCRIPTOR_RULE + GentFullReversed[1:]

    # пр. К. Маркса
    GentShortReversedWithShortcut = SHORT_STREET_DESCRIPTOR_RULE + GentFullReversedWithShortcut[
        1:]

    # пл. В. В. Ленина
    GentShortReversedWithExtendedShortcut = SHORT_STREET_DESCRIPTOR_RULE + GentFullReversedWithExtendedShortcut[
        1:]

    # Николая Ершова улица
    GentFull = GentFullReversed[1:] + GentFullReversed[:1]

    # Обуховской Обороны пр-кт
    GentShort = GentShortReversed[2:] + SHORT_STREET_DESCRIPTOR_RULE

    # 1-я новорублевская улица
    AdjFullWithNumericPart = NUMERIC_STREET_PART_RULE + AdjFull

    # улица 1-я новорублевская
    AdjFullReversedWithNumericPart = AdjFullReversed[:
                                                     1] + AdjFullWithNumericPart[:
                                                                                 -1]

    # 1-я новорублевская ул.
    AdjShortWithNumericPart = AdjFullWithNumericPart[:-1] + SHORT_STREET_DESCRIPTOR_RULE

    # ул. 1-я промышленная
    AdjShortReversedWithNumericPart = SHORT_STREET_DESCRIPTOR_RULE + AdjFullWithNumericPart[:
                                                                                            -1]

    # проспект 50 лет октября
    GentFullReversedWithNumericPrefix = GentFullReversed[:1] + NUMERIC_STREET_PART_WITHOUT_SUFFIX_RULE + GentFullReversed[
        1:2] + GentFullReversed[1:]

    # пр-т. 50 лет советской власти
    GentShortReversedWithNumericPrefix = GentShortReversed[:2] + NUMERIC_STREET_PART_WITHOUT_SUFFIX_RULE + GentFullReversed[
        1:2] + GentFullReversed[1:]

    # 2-ой проезд Перова Поля
    GentNumericSplittedByFullDescriptor = NUMERIC_STREET_PART_RULE + GentFullReversed

    # 7-я ул. текстильщиков
    GentNumericSplittedByShortDescriptor = NUMERIC_STREET_PART_RULE + GentShortReversed
    '''
    Street names with house numbers
    '''

    # Зеленая улица, дом 7
    AdjFullWithHn = AdjFull + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR

    # улица Зеленая, дом 7
    AdjFullReversedWithHn = AdjFullReversed + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR

    # ул. Нижняя Красносельская дом 7
    AdjShortWithHn = AdjShort + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR

    # Настасьинский пер., дом 2
    AdjShortReversedWithHn = AdjShortReversed + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR

    # улица Красной Гвардии, дом 2
    AdjNounFullWithHn = AdjNounFull + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR

    # ул. Брянской пролетарской дивизии дом 2
    AdjNounShortWithHn = AdjNounShort + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR

    # Николая Ершова улица дом 1
    GentFullWithHn = GentFull + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR

    # улица Карла Маркса дом 1
    GentFullReversedWithHn = GentFullReversed + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR

    # улица К. Маркса, дом 1
    GentFullReversedWithShortcutWithHn = GentFullReversedWithShortcut + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR

    # улица В. И. Ленина, дом 1
    GentFullReversedWithExtendedShortcutWithHn = GentFullReversedWithExtendedShortcut + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR

    # Обуховской Обороны пр-кт дом 1
    GentShortWithHn = GentShort + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR

    # пр-кт Обуховской Обороны дом 1
    GentShortReversedWithHn = GentShortReversed + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR

    # ул. К. Маркса, дом 1
    GentShortReversedWithShortcutWithHn = GentShortReversedWithShortcut + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR

    # ул. В. И. Ленина, дом 1
    GentShortReversedWithExtendedShortcutWithHn = GentShortReversedWithExtendedShortcut + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR

    # 1-я новорублевская улица дом 1
    AdjFullWithNumericPartWithHn = AdjFullWithNumericPart + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR

    # улица 1-я новорублевская, дом 1
    AdjFullReversedWithNumericPartWithHn = AdjFullReversedWithNumericPart + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR

    # 1-я новорублевская ул. дом 1
    AdjShortWithNumericPartWithHn = AdjShortWithNumericPart + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR

    # ул. 1-я промышленная, дом 1
    AdjShortReversedWithNumericPartWithHn = AdjShortReversedWithNumericPart + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR

    # проспект 50 лет октября, дом 1
    GentFullReversedWithNumericPrefixWithHn = GentFullReversedWithNumericPrefix + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR

    # пр-т. 50 лет советской власти, дом 1
    GentShortReversedWithNumericPrefixWithHn = GentShortReversedWithNumericPrefix + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR

    # 2-ой проезд Перова Поля, дом 1
    GentNumericSplittedByFullDescriptorWithHn = GentNumericSplittedByFullDescriptor + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR

    # 7-я ул. текстильщиков, дом 1
    GentNumericSplittedByShortDescriptorWithHn = GentNumericSplittedByShortDescriptor + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR
    '''
    Street names with house numbers and letters
    '''

    # Зеленая улица, дом 7, лит А
    AdjFullWithHnAndLetter = AdjFull + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR + HOUSE_LETTER_GRAMMAR

    # улица Зеленая, дом 7, лит А
    AdjFullReversedWithHnAndLetter = AdjFullReversed + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR + HOUSE_LETTER_GRAMMAR

    # ул. Нижняя Красносельская дом 7, лит А
    AdjShortWithHnAndLetter = AdjShort + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR + HOUSE_LETTER_GRAMMAR

    # Настасьинский пер., дом 2
    AdjShortReversedWithHnAndLetter = AdjShortReversed + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR + HOUSE_LETTER_GRAMMAR

    # улица Красной Гвардии, дом 2
    AdjNounFullWithHnAndLetter = AdjNounFull + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR + HOUSE_LETTER_GRAMMAR

    # ул. Брянской пролетарской дивизии дом 2
    AdjNounShortWithHnAndLetter = AdjNounShort + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR + HOUSE_LETTER_GRAMMAR

    # Николая Ершова улица дом 1
    GentFullWithHnAndLetter = GentFull + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR + HOUSE_LETTER_GRAMMAR

    # улица Карла Маркса дом 1
    GentFullReversedWithHnAndLetter = GentFullReversed + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR + HOUSE_LETTER_GRAMMAR

    # улица К. Маркса, дом 1
    GentFullReversedWithShortcutWithHnAndLetter = GentFullReversedWithShortcut + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR + HOUSE_LETTER_GRAMMAR

    # улица В. И. Ленина, дом 1
    GentFullReversedWithExtendedShortcutWithHnAndLetter = GentFullReversedWithExtendedShortcut + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR + HOUSE_LETTER_GRAMMAR

    # Обуховской Обороны пр-кт дом 1
    GentShortWithHnAndLetter = GentShort + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR + HOUSE_LETTER_GRAMMAR

    # пр-кт Обуховской Обороны дом 1
    GentShortReversedWithHnAndLetter = GentShortReversed + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR + HOUSE_LETTER_GRAMMAR

    # ул. К. Маркса, дом 1
    GentShortReversedWithShortcutWithHnAndLetter = GentShortReversedWithShortcut + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR + HOUSE_LETTER_GRAMMAR

    # ул. В. И. Ленина, дом 1
    GentShortReversedWithExtendedShortcutWithHnAndLetter = GentShortReversedWithExtendedShortcut + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR + HOUSE_LETTER_GRAMMAR

    # 1-я новорублевская улица дом 1
    AdjFullWithNumericPartWithHnAndLetter = AdjFullWithNumericPart + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR + HOUSE_LETTER_GRAMMAR

    # улица 1-я новорублевская, дом 1
    AdjFullReversedWithNumericPartWithHnAndLetter = AdjFullReversedWithNumericPart + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR + HOUSE_LETTER_GRAMMAR

    # 1-я новорублевская ул. дом 1
    AdjShortWithNumericPartWithHnAndLetter = AdjShortWithNumericPart + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR + HOUSE_LETTER_GRAMMAR

    # ул. 1-я промышленная, дом 1
    AdjShortReversedWithNumericPartWithHnAndLetter = AdjShortReversedWithNumericPart + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR + HOUSE_LETTER_GRAMMAR

    # проспект 50 лет октября, дом 1
    GentFullReversedWithNumericPrefixWithHnAndLetter = GentFullReversedWithNumericPrefix + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR + HOUSE_LETTER_GRAMMAR

    # пр-т. 50 лет советской власти, дом 1
    GentShortReversedWithNumericPrefixWithHnAndLetter = GentShortReversedWithNumericPrefix + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR + HOUSE_LETTER_GRAMMAR

    # 2-ой проезд Перова Поля, дом 1
    GentNumericSplittedByFullDescriptorWithHnAndLetter = GentNumericSplittedByFullDescriptor + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR + HOUSE_LETTER_GRAMMAR

    # 7-я ул. текстильщиков, дом 1
    GentNumericSplittedByShortDescriptorWithHnAndLetter = GentNumericSplittedByShortDescriptor + OPTIONAL_COMMA_GRAMMAR + HOUSE_NUMBER_GRAMMAR + HOUSE_LETTER_GRAMMAR
예제 #8
0
            'attribute': AddressObject.Attributes.Street_Descriptor,
        },
    },
    {
        'labels': [
            eq('.'),
        ],
        'optional': True,
        'normalization': NormalizationType.Original,
    }
]

NUMERIC_STREET_PART_RULE = [  # 1-я, 10-й, 100500-ой и т.д.
    {
        'labels': [
            gram('INT'),
            gte(1),
        ],
        'normalization': NormalizationType.Original,
        'interpretation': {
            'attribute': AddressObject.Attributes.Street_Name,
        },
    }, {
        'labels': [
            eq('-'),
        ],
        'normalization': NormalizationType.Original,
        'interpretation': {
            'attribute': AddressObject.Attributes.Street_Name,
        },
    }, {
예제 #9
0
            'attribute': AddressObject.Attributes.Street_Descriptor,
        },
    },
    {
        'labels': [
            eq('.'),
        ],
        'optional': True,
        'normalization': NormalizationType.Original,
    }
]

NUMERIC_STREET_PART_RULE = [  # 1-я, 10-й, 100500-ой и т.д.
    {
        'labels': [
            gram('INT'),
            gte(1),
        ],
        'normalization': NormalizationType.Original,
        'interpretation': {
            'attribute': AddressObject.Attributes.Street_Name,
        },
    }, {
        'labels': [
            eq('-'),
        ],
        'normalization': NormalizationType.Original,
        'interpretation': {
            'attribute': AddressObject.Attributes.Street_Name,
        },
    }, {
예제 #10
0
PREFIX_GRAMMAR = {
    'labels': [
        dictionary(PREFIX_DICTIONARY),
    ]
}

CURRENCY_GRAMMAR = {
    'labels': [
        dictionary(CURRENCY_DICTIONARY),
    ]
}

OPTIONAL_PUNCT_GRAMMAR = {
    'labels': [
        gram('PUNCT'),
    ],
    'optional': True,
}

NUMBER_GRAMMAR = {
    'labels': [
        gram('NUMBER'),
    ],
}

HAND_WRITTEN_NUMBER_GRAMMAR = {
    'labels': [gram('NUMR')],
    'repeatable': True,
}
예제 #11
0
class ProbabilisticPerson(Enum):
    '''
    This grammars matches words that looks like (but may not to be) person names
    Not included in natasha DEFAULT_GRAMMARS, but shows good result on factRuEval-16 testset
    '''

    FirstnameAndLastname = [
        Person.Firstname.value[0],
        POSSIBLE_LASTNAME_GRAMMAR,
    ]

    InitialsAndLastname = Person.InitialsAndLastname.value[:4] + [
        POSSIBLE_LASTNAME_GRAMMAR,
    ]

    LastnameAndInitials = [POSSIBLE_LASTNAME_GRAMMAR
                           ] + Person.InitialsAndLastname.value[:4]

    FirstnameAsInitialsAndLastname = Person.InitialsAndLastname.value[:2] + [
        POSSIBLE_LASTNAME_GRAMMAR,
    ]

    LastnameAndfirstnameAsInitials = [
        POSSIBLE_LASTNAME_GRAMMAR,
    ] + Person.InitialsAndLastname.value[:2]

    # Джон Х. Доу
    FirstnameAndMiddlenameAsInitialsWithLastname = FirstnameAndLastname[:1] + Person.InitialsAndLastname.value[
        2:4] + [
            POSSIBLE_LASTNAME_GRAMMAR,
        ]

    FirstnameAndLastnameWithNobilityParticle = [
        Person.Firstname.value[0],
        {
            'labels': [
                dictionary(NAME_NOBILITY_PARTICLE_DICTIONARY),
            ],
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            }
        },
        POSSIBLE_LASTNAME_GRAMMAR,
    ]

    FirstnameAndLastnameWithPosition = Person.WithPosition.value[:-1] + [
        FirstnameAndLastname[-1]
    ]

    # Эрнесто «Че» Гевара
    FirstnameAndLastnameWithQuotedNickname = Person.Firstname.value[:1] + Person.FirstnameAndLastnameWithQuotedNickname.value[
        1:-1] + [
            POSSIBLE_LASTNAME_GRAMMAR,
        ]

    # С.П. фон Дервиз
    InitialsAndLastnameWithNobilityParticle = InitialsAndLastname[:4] + [
        FirstnameAndLastnameWithNobilityParticle[1],
        POSSIBLE_LASTNAME_GRAMMAR,
    ]

    # John S. Doe
    Latin = [
        {
            'labels': [
                gram('LATN'),
                is_capitalized(True),
            ],
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': PersonObject.Attributes.Firstname,
            }
        },
        {
            'labels': [
                gram('LATN'),
                is_capitalized(True),
            ],
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': PersonObject.Attributes.Middlename,
            }
        },
        {
            'labels': [gram('PUNCT'), eq('.')],
        },
        {
            'labels': [
                gram('LATN'),
                is_capitalized(True),
            ],
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            }
        },
    ]
예제 #12
0
class Person(Enum):

    # Иванов Иван Иванович
    Full = [
        {
            'labels': [
                gram('Surn'),
                gram_not('Abbr'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            },
        },
        {
            'labels': [
                gram('Name'),
                gram_not('Abbr'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Firstname,
            },
        },
        {
            'labels': [
                gram('Patr'),
                gram_not('Abbr'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Middlename,
            },
        },
    ]

    # Иван Иванович Иванов
    FullReversed = [
        {
            'labels': [
                gram('Name'),
                gram_not('Abbr'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Firstname,
            },
        },
        {
            'labels': [
                gram('Patr'),
                gram_not('Abbr'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Middlename,
            },
        },
        {
            'labels': [
                gram('Surn'),
                gram_not('Abbr'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            },
        },
    ]

    # Фелипе Родригес Фернандес
    # https://www.englishelp.ru/business-english/other/284-patronymic-vs-middle-name.html

    FullReversedWithLatinMiddlename = [
        {
            'labels': [
                gram('Name'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Firstname,
            },
        },
        {
            'labels': [
                gram('Name'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'repeatable': True,
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Middlename,
            },
        },
        {
            'labels': [
                gram('Surn'),
                gnc_match(0, solve_disambiguation=True),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            },
        },
    ]

    # Л. А. Раневская
    InitialsAndLastname = [
        {
            'labels': [
                gram_in(['Name', 'Abbr']),
            ],
            'interpretation': {
                'attribute': PersonObject.Attributes.Firstname,
            },
            'normalization': NormalizationType.Original,
        },
        {
            'labels': [
                gram('PUNCT'),
                eq('.'),
            ],
        },
        {
            'labels': [
                gram_in(['Patr', 'Abbr']),
                gnc_match(0, solve_disambiguation=True),
            ],
            'interpretation': {
                'attribute': PersonObject.Attributes.Middlename,
            },
            'normalization':
            NormalizationType.Original,
        },
        {
            'labels': [
                gram('PUNCT'),
                eq('.'),
            ],
        },
        {
            'labels': [
                gram('Surn'),
                gram_not('Abbr'),
                gnc_match(0, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            },
        },
    ]

    # Иван Иванов
    FirstnameAndLastname = [
        {
            'labels': [
                gram('Name'),
                gram_not('Abbr'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Firstname,
            },
        },
        {
            'labels': [
                gram('Surn'),
                gram_not('Abbr'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            },
        },
    ]

    FirstnameAndLastnameWithQuotedNickname = [
        FirstnameAndLastname[0], {
            'labels': {
                gram('QUOTE'),
                gram_any({
                    'G-QUOTE',
                    'L-QUOTE',
                }),
            },
            'normalization': NormalizationType.Original,
        }, {
            'labels': {
                gram_not_in({
                    'QUOTE',
                    'PUNCT',
                }),
            },
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': PersonObject.Attributes.Nickname,
            },
        }, {
            'labels': {
                gram('QUOTE'),
            },
            'normalization': NormalizationType.Original,
        }, InitialsAndLastname[-1]
    ]

    # Иванов Иван
    LastnameAndFirstname = [
        {
            'labels': [
                gram('Surn'),
                gram_not('Abbr'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            },
        },
        {
            'labels': [
                gram('Name'),
                gram_not('Abbr'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Firstname,
            },
        },
    ]

    # Александр Ф. Скляр
    FullReversedWithMiddlenameAsInitials = FullReversed[:
                                                        1] + InitialsAndLastname[
                                                            3:]

    # Раневская Л. А.
    LastnameAndInitials = [
        LastnameAndFirstname[0],
    ] + InitialsAndLastname[:4]

    # Раневская Л.
    LastnameAndFirstnameAsInitials = [
        LastnameAndFirstname[0],
    ] + InitialsAndLastname[:2]

    # Л. Раневская
    FirstnameAsInitialsAndLastname = InitialsAndLastname[:2] + [
        InitialsAndLastname[-1],
    ]

    # Иван Иванович
    FirstnameAndMiddlename = [
        {
            'labels': [
                gram('Name'),
                gram_not('Abbr'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Firstname,
            },
        },
        {
            'labels': [
                gram('Patr'),
                gram_not('Abbr'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Middlename,
            },
        },
    ]

    # Иванов
    Lastname = [
        {
            'labels': [
                gram('Surn'),
                gram_any({
                    'sing',
                    'Stgm',
                }),
                gram_not('Abbr'),
                is_capitalized(True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            },
        },
    ]

    # Иванович
    Middlename = [{
        'labels': [
            gram('Patr'),
            gram_any({
                'sing',
                'Stgm',
            }),
            gram_not('Abbr'),
            is_capitalized(True),
        ],
        'normalization':
        NormalizationType.Inflected,
        'interpretation': {
            'attribute': PersonObject.Attributes.Middlename,
        },
    }]

    # Иван
    Firstname = [
        {
            'labels': [
                gram('Name'),
                gram_any({
                    'sing',
                    'Stgm',
                }),
                gram_not('Abbr'),
                is_capitalized(True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Firstname,
            },
        },
    ]

    # Отто фон Бисмарк
    FirstnameAndLastnameWithNobilityParticle = [
        FullReversed[0],
        {
            'labels': [
                dictionary(NAME_NOBILITY_PARTICLE_DICTIONARY),
            ],
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            }
        },
        {
            'labels': [
                gram('Surn'),
                gram_not('Abbr'),
                gnc_match(0, solve_disambiguation=True)
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            },
        },
    ]

    # Премьер-министр РФ Дмитрий Медведев
    WithPosition = [
        {
            'labels': [
                gram('Person/Position'),
            ],
            'interpretation': {
                'attribute': PersonObject.Attributes.Descriptor,
            },
        },
        {
            'labels': [
                or_((
                    and_((
                        or_((
                            gram_any({
                                'ablt',
                                'loct',
                                'gent',
                            }),
                            gram('Fixd'),
                        )),
                        gram_not_in({
                            'Name',
                            'Patr',
                            'Surn',
                        }),
                    )),
                    gram('Abbr'),
                    gram('LATN'),
                )),
            ],
            'optional':
            True,
            'repeatable':
            True,
            'normalization':
            NormalizationType.Original,
        },
        {
            'labels': [
                gram('Name'),
                case_match(0, solve_disambiguation=True),
                number_match(0, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Firstname,
            },
        },
        {
            'labels': [
                gram('Patr'),
                case_match(0, solve_disambiguation=True),
                number_match(0, solve_disambiguation=True),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'optional':
            True,
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Middlename,
            },
        },
        {
            'labels': [
                gram('Surn'),
                case_match(0, solve_disambiguation=True),
                number_match(0, solve_disambiguation=True),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            },
        },
    ]

    # Пресс-секретарь «Роснефти» Михаил Леонтьев
    WithPositionAndQuotedOrganisationName = (WithPosition[:2] + [
        {
            'labels': [
                gram('QUOTE'),
            ],
            'normalization': NormalizationType.Original,
        },
        {
            'labels': [
                gram_not('QUOTE'),
                gram_not_in('END-OF-LINE'),
            ],
            'repeatable': True,
            'normalization': NormalizationType.Original,
        },
        {
            'labels': [
                gram('QUOTE'),
            ],
            'normalization': NormalizationType.Original,
        },
    ] + WithPosition[2:])

    # граф де Кристо
    PositionAndNobilitySurname = [{
        'labels': [
            gram('Person/Position'),
        ],
    }, {
        'labels': [
            dictionary(NAME_NOBILITY_PARTICLE_DICTIONARY),
        ],
    }, {
        'labels': [
            gram('Surn'),
            gnc_match(0, solve_disambiguation=True),
        ],
        'interpretation': {
            'attribute': PersonObject.Attributes.Lastname,
        },
    }]

    # Генрих Восьмой / Карл XII
    NameWithNumericPart = [{
        'labels': [
            gram_in({
                'Name',
                'sing',
            }),
            gram_not('Abbr'),
        ],
        'interpretation': {
            'attribute': PersonObject.Attributes.Firstname,
        },
    }, {
        'labels': [
            or_((
                and_((gram_in({
                    'ADJF',
                    'Anum',
                }), gnc_match(-1, solve_disambiguation=True))),
                gram('ROMN'),
            ))
        ]
    }]
예제 #13
0
class Organisation(Enum):

    OfficialQuoted = [
        {
            'labels': [
                or_((
                    gram('Orgn/Commercial'),
                    gram('Orgn/Social'),
                    gram('Orgn/Abbr'),
                )),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Descriptor,
            },
        },
        {
            'labels': [
                is_abbr(True),
            ],
            'optional': True,
            'repeatable': True,
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Descriptor,
            },
        },
        {
            'labels': [
                gram('QUOTE'),
            ],
        },
        {
            'labels': [
                gram_not('QUOTE'),
                gram_not_in('END-OF-LINE'),
                not_eq('.'),
            ],
            'repeatable': True,
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                gram('QUOTE'),
            ],
        },
    ]

    Abbr = [
        {
            'labels': [
                gram('Abbr'),
                gram('Orgn'),
                gram_not('Orgn/Abbr'),
            ],
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Name,
            },
        },
    ]

    IndividualEntrepreneur = [
        {
            'labels': [
                eq('ИП'),
            ],
            'normalization': NormalizationType.Original,
        },
        Person.Full.value[0],
        Person.Full.value[1],
        Person.Full.value[2],
    ]

    SimpleLatin = [
        {
            'labels': [
                gram('Orgn/Commercial'),
            ],
            'normalization': NormalizationType.Normalized,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Descriptor,
            },
        },
        {
            'labels': [
                gram_any({
                    'LATN',
                    'NUMBER',
                }),
            ],
            'repeatable': True,
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Name,
            },
        },
    ]

    # Санкт-Петербургский Государственный университет
    Educational = [{
        'labels': [
            gram('ADJF'),
            is_capitalized(True),
        ],
        'normalization': NormalizationType.Inflected,
        'interpretation': {
            'attribute': OrganisationObject.Attributes.Name,
        },
    }, {
        'labels': [
            gram('ADJF'),
            gnc_match(-1, solve_disambiguation=True),
        ],
        'optional':
        True,
        'repeatable':
        True,
        'normalization':
        NormalizationType.Inflected,
        'interpretation': {
            'attribute': OrganisationObject.Attributes.Name,
        },
    }, {
        'labels': [
            gram('Orgn/Educational'),
            gnc_match(0, solve_disambiguation=True),
            gnc_match(-1, solve_disambiguation=True),
        ],
        'normalization':
        NormalizationType.Normalized,
        'interpretation': {
            'attribute': OrganisationObject.Attributes.Descriptor,
        },
    }, {
        'labels': [
            gram_any({
                'ablt',
                'gent',
            }),
            gram_not_in({
                'PREP',
            }),
            dictionary_not({
                'имя',
            }),
        ],
        'normalization':
        NormalizationType.Original,
        'interpretation': {
            'attribute': OrganisationObject.Attributes.Name,
        },
        'optional':
        True,
        'repeatable':
        True,
    }]

    # Публичная библиотека имени М. Е. Салтыкова-Щедрина
    EducationalWithInitials = Educational + NAMED_ORG_INITIALS_PREFIX_RULE + NAMED_ORG_INITIALS_RULE
    # Публичная библиотека имени Салтыкова-Щедрина
    EducationalWithLastname = Educational + NAMED_ORG_INITIALS_PREFIX_RULE + LASTNAME_GRAMMAR

    # Кировский завод
    AdjCommercial = Educational[:2] + [
        {
            'labels': [
                gram('Orgn/Commercial'),
                gnc_match(0, solve_disambiguation=True),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Normalized,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Descriptor,
            },
        },
        Educational[-1],
    ]

    AdjCommercialWithInitials = AdjCommercial + NAMED_ORG_INITIALS_PREFIX_RULE + NAMED_ORG_INITIALS_RULE
    AdjCommercialWithLastname = AdjCommercial + NAMED_ORG_INITIALS_PREFIX_RULE + LASTNAME_GRAMMAR

    # Общества андрологии и сексуальной медицины
    Social = [
        {
            'labels': [
                gram('Orgn/Social'),
                gram('sing'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Descriptor,
            },
        },
        {
            'labels': [
                gram_not_in({
                    'PREP',
                    'CONJ',
                }),
                gram_any({
                    'accs',
                    'datv',
                    'gent',
                }),
            ],
            'normalization':
            NormalizationType.Original,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                gram_any({
                    'gent',
                    'accs',
                    'ablt',
                }),
                gram_not_in({
                    'PREP',
                    'Name',
                    'Patr',
                    'Surn',
                }),
            ],
            'optional':
            True,
            'repeatable':
            True,
            'normalization':
            NormalizationType.Original,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Name,
            },
        },
    ]

    SocialWithInitials = Social + NAMED_ORG_INITIALS_PREFIX_RULE + NAMED_ORG_INITIALS_RULE
    SocialWithLastname = Social + NAMED_ORG_INITIALS_PREFIX_RULE + LASTNAME_GRAMMAR

    AdjSocial = [
        {
            'labels': [
                gram('ADJF'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                gram('ADJF'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'optional': True,
            'repeatable': True,
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                gram('Orgn/Social'),
                gnc_match(0, solve_disambiguation=True),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Descriptor,
            },
        },
        Social[-1],
    ]

    AdjSocialWithInitials = AdjSocial + NAMED_ORG_INITIALS_PREFIX_RULE + NAMED_ORG_INITIALS_RULE
    AdjSocialWithLastname = AdjSocial + NAMED_ORG_INITIALS_PREFIX_RULE + LASTNAME_GRAMMAR
예제 #14
0
        }])
]

NAMED_ORG_INITIALS_AND_LASTNAME = [
    {
        'labels': [
            gram_in(['Name', 'Abbr']),
        ],
        'normalization': NormalizationType.Original,
        'interpretation': {
            'attribute': OrganisationObject.Attributes.Name,
        },
    },
    {
        'labels': [
            gram('PUNCT'),
            eq('.'),
        ],
        'normalization': NormalizationType.Original,
    },
    {
        'labels': [
            gram_in(['Patr', 'Abbr']),
        ],
        'normalization': NormalizationType.Original,
        'interpretation': {
            'attribute': OrganisationObject.Attributes.Name,
        },
    },
    {
        'labels': [
예제 #15
0
class Street(Enum):

    # Садовая улица
    AdjFull = [
        {
            'labels': [
                gram('ADJF'),
                gram_not('Abbr'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': AddressObject.Attributes.Street_Name,
            },
        },
        {
            'labels': [
                gram('ADJF'),
                gram_not('Abbr'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'repeatable': True,
            'optional': True,
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': AddressObject.Attributes.Street_Name,
            },
        },
        {
            'labels': [
                dictionary(STREET_DESCRIPTOR_DICTIONARY),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': AddressObject.Attributes.Street_Descriptor,
            },
        },
    ]

    # улица Садовая
    AdjFullReversed = [
        {
            'labels': [
                dictionary(STREET_DESCRIPTOR_DICTIONARY),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': AddressObject.Attributes.Street_Descriptor,
            },
        },
        {
            'labels': [
                gram('ADJF'),
                gram_not('Abbr'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'repeatable': True,
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': AddressObject.Attributes.Street_Name,
            },
        },
    ]

    # ул. Садовая
    AdjShort = SHORT_STREET_DESCRIPTOR_RULE + AdjFull[:2]

    # Садовая ул.
    AdjShortReversed = AdjFull[:2] + SHORT_STREET_DESCRIPTOR_RULE

    # улица Красных Десантников
    AdjNounFull = [AdjFullReversed[0]] + AdjFull[:2] + [
        {
            'labels': [
                gram('gent'),
                gram_not('Abbr'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'repeatable': True,
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': AddressObject.Attributes.Street_Name,
            },
        }
    ]

    # ул. Красных Десантников
    AdjNounShort = AdjShort + [
        AdjNounFull[-1]
    ]

    # улица Карла Маркса
    GentFullReversed = [
        AdjFullReversed[0],
        {
            'labels': [
                gram('gent'),
                gram_not('Abbr'),
            ],
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': AddressObject.Attributes.Street_Name,
            },
        },
        {
            'labels': [
                gram('gent'),
                gram_not('Abbr'),
                gnc_match(-1, solve_disambiguation=True)
            ],
            'optional': True,
            'repeatable': True,
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': AddressObject.Attributes.Street_Name,
            },
        },
    ]

    # улица К. Маркса
    GentFullReversedWithShortcut = [
        GentFullReversed[0],
        {
            'labels': [
                gram('Abbr'),
            ],
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': AddressObject.Attributes.Street_Name,
            },
        },
        {
            'labels': [
                eq('.'),
            ],
            'normalization': NormalizationType.Original,
        },
    ] + GentFullReversed[1:]

    # улица В. В. Ленина
    GentFullReversedWithExtendedShortcut = GentFullReversedWithShortcut[:3] + GentFullReversedWithShortcut[1:3] + GentFullReversedWithShortcut[3:]

    # пр. Маршала жукова
    GentShortReversed = SHORT_STREET_DESCRIPTOR_RULE + GentFullReversed[1:]

    # пр. К. Маркса
    GentShortReversedWithShortcut = SHORT_STREET_DESCRIPTOR_RULE + GentFullReversedWithShortcut[1:]

    # пл. В. В. Ленина
    GentShortReversedWithExtendedShortcut = SHORT_STREET_DESCRIPTOR_RULE + GentFullReversedWithExtendedShortcut[1:]

    # Николая Ершова улица
    GentFull = GentFullReversed[1:] + GentFullReversed[:1]

    # Обуховской Обороны пр-кт
    GentShort = GentShortReversed[2:] + SHORT_STREET_DESCRIPTOR_RULE

    # 1-я новорублевская улица
    AdjFullWithNumericPart = NUMERIC_STREET_PART_RULE + AdjFull

    # улица 1-я новорублевская
    AdjFullReversedWithNumericPart = AdjFullReversed[:1] + AdjFullWithNumericPart[:-1]

    # 1-я новорублевская ул.
    AdjShortWithNumericPart = AdjFullWithNumericPart[:-1] + SHORT_STREET_DESCRIPTOR_RULE

    # ул. 1-я промышленная
    AdjShortReversedWithNumericPart = SHORT_STREET_DESCRIPTOR_RULE + AdjFullWithNumericPart[:-1]

    # проспект 50 лет октября
    GentFullReversedWithNumericPrefix = GentFullReversed[:1] + NUMERIC_STREET_PART_WITHOUT_SUFFIX_RULE + GentFullReversed[1:2] + GentFullReversed[1:]

    # пр-т. 50 лет советской власти
    GentShortReversedWithNumericPrefix = GentShortReversed[:2] + NUMERIC_STREET_PART_WITHOUT_SUFFIX_RULE + GentFullReversed[1:2] + GentFullReversed[1:]

    # 2-ой проезд Перова Поля
    GentNumericSplittedByFullDescriptor = NUMERIC_STREET_PART_RULE + GentFullReversed

    # 7-я ул. текстильщиков
    GentNumericSplittedByShortDescriptor = NUMERIC_STREET_PART_RULE + GentShortReversed