Пример #1
0
    for m in re.finditer(r'(?<=/)([A-Z]+\.?\s*\d+)', text):
        text += ' # {}'.format(m.group(0))

    for m in re.finditer(r'(?<=/)([0-9]+\.\s*\w+)', text):
        text += ' # {}'.format(m.group(0))

    text = text.strip()
    text = re.sub(r'\s+', ' ', text)

    return text


ignore = (
    'Puolukka',
    'Otava',
    'Vaaka',
    'Varsa',
    'Neito',
    'Voima',
    'RPr',  # Ratsuväkiprikaati talvisodassa, rannikkoprikaati jatkosodassa
)


if __name__ == '__main__':
    if sys.argv[1] == 'test':
        import doctest
        doctest.testmod()
        exit()

    process_stage(sys.argv, preprocessor=preprocessor, ignore=ignore)
def pruner(candidate):
    if name_re_compiled.fullmatch(candidate):
        return candidate
    return None


def set_dataset(dataset_name):
    if dataset_name == 'event':
        print('Handling as events')
        ValidationContext.dataset = 'event'
    elif dataset_name == 'photo':
        print('Handling as photos')
        ValidationContext.dataset = 'photo'
    else:
        raise ValueError('Invalid dataset: {}'.format(dataset_name))


if __name__ == '__main__':
    if sys.argv[1] == 'test':
        import doctest
        doctest.testmod()
        exit()

    set_dataset(sys.argv[1])

    args = sys.argv[0:1] + sys.argv[2:]

    process_stage(args, ignore=ignore, validator_class=Validator,
            preprocessor=preprocessor, pruner=pruner, log_level='INFO')
Пример #3
0
        pruner_fun = pruner
    elif sys.argv[1] == 'photo':
        print('Handling as photos')
        pruner_fun = None
    else:
        raise ValueError('Invalid dataset')

    args = sys.argv[0:1] + sys.argv[2:]

    no_duplicates = [
        'http://www.yso.fi/onto/suo/kunta',
        'http://ldf.fi/schema/warsa/Town',
        'http://ldf.fi/schema/warsa/Village',
        'http://ldf.fi/schema/warsa/Body_of_water',
        'http://ldf.fi/schema/warsa/Hypsographic_feature',
        'http://ldf.fi/pnr-schema#place_type_540',
        'http://ldf.fi/pnr-schema#place_type_550',
        'http://ldf.fi/pnr-schema#place_type_560',
        ISLAND_TYPE, # Selected islands
    ]

    prep = preprocessor
    if args[-1] == 'naive':
        prep = None
        ignore = None
        no_duplicates = None
        args.pop()

    process_stage(args, ignore=ignore, pruner=pruner_fun, validator_class=Validator,
            preprocessor=preprocessor, remove_duplicates=no_duplicates)
    ]

    events_only_ignore = [
        'turtola',  # only for events!
        'pajari'  # only for events, remove for photos
    ]

    if sys.argv[1] == 'event':
        print('Handling as events')
        ignore = ignore + events_only_ignore
    elif sys.argv[1] == 'photo':
        print('Handling as photos')
    else:
        raise ValueError('Invalid dataset')

    args = sys.argv[0:1] + sys.argv[2:]

    no_duplicates = [
        'http://www.yso.fi/onto/suo/kunta',
        'http://ldf.fi/warsa/places/place_types/Kirkonkyla_kaupunki',
        'http://ldf.fi/warsa/places/place_types/Kyla',
        'http://ldf.fi/warsa/places/place_types/Vesimuodostuma',
        'http://ldf.fi/warsa/places/place_types/Maastokohde',
        'http://ldf.fi/pnr-schema#place_type_540',
        'http://ldf.fi/pnr-schema#place_type_550',
        'http://ldf.fi/pnr-schema#place_type_560',
    ]

    process_stage(args, ignore=ignore, validator_class=Validator,
            preprocessor=preprocessor, remove_duplicates=no_duplicates)
Пример #5
0
from arpa_linker.link_helper import process_stage
import logging
import sys

logger = logging.getLogger('arpa_linker.arpa')

if __name__ == '__main__':

    process_stage(sys.argv, log_level='INFO')
Пример #6
0
)

if __name__ == '__main__':
    if sys.argv[1] == 'test':
        import doctest
        doctest.testmod()
        exit()

    special_args = sys.argv[-2:]
    if 'no_cover' in special_args:
        Validator.accept_cover = False
        sys.argv.remove('no_cover')
    if 'no_length_filter' in special_args:
        Validator.filter_by_length = False
        sys.argv.remove('no_length_filter')

    prep = preprocessor
    if sys.argv[-1] == 'naive':
        prep = None
        ignore = None
        sys.argv.pop()

    if sys.argv[4] == 'battle_unit_linked.ttl':
        ignore = None

    process_stage(sys.argv,
                  preprocessor=prep,
                  ignore=ignore,
                  validator_class=Validator,
                  log_level='INFO')
Пример #7
0
        ValidationContext.dataset = 'event'
    elif dataset_name == 'photo':
        print('Handling as photos')
        ValidationContext.dataset = 'photo'
    else:
        raise ValueError('Invalid dataset: {}'.format(dataset_name))


if __name__ == '__main__':
    if sys.argv[1] == 'test':
        import doctest
        doctest.testmod()
        exit()

    set_dataset(sys.argv[1])

    args = sys.argv[0:1] + sys.argv[2:]

    prep = preprocessor
    if args[-1] == 'naive':
        prep = None
        ignore = None
        args.pop()

    process_stage(args,
                  ignore=ignore,
                  validator_class=Validator,
                  preprocessor=prep,
                  pruner=pruner,
                  log_level='DEBUG')