示例#1
0
def do_sync(args):
    logger.info('Starting sync.')

    meltano_config = load_json_file(args.config)
    bucket_files_definition = meltano_config.get("bucket_files_definition",
                                                 None)
    if bucket_files_definition:
        if os.path.isfile(bucket_files_definition):
            config = tap_s3_csv.config.load(bucket_files_definition)
        else:
            logger.error("tap_s3_csv: '{}' file not found".format(
                bucket_files_definition))
            exit(1)
    else:
        check_config(CONFIG, REQUIRED_CONFIG_KEYS)
        csv_files = CONFIG['files']

    state = load_state(args.state)

    for table in config['tables']:
        state = sync_table(config, state, table)

    state = {'COMPLETED': True}
    singer.write_state(state)

    logger.info('Done syncing.')
示例#2
0
def sync_table_file(config, s3_file, table_spec, schema):
    logger.info('Syncing file "{}".'.format(s3_file))

    bucket = config['bucket']
    table_name = table_spec['name']

    iterator = tap_s3_csv.format_handler.get_row_iterator(
        config, table_spec, s3_file)

    records_synced = 0

    for row in iterator:
        metadata = {
            '_s3_source_bucket': bucket,
            '_s3_source_file': s3_file,

            # index zero, +1 for header row
            '_s3_source_lineno': records_synced + 2
        }

        try:
            to_write = [{**conversion.convert_row(row, schema), **metadata}]
            singer.write_records(table_name, to_write)
        except BrokenPipeError as bpe:
            logger.error(
                f'Pipe to loader broke after {records_synced} records were written from {s3_file}: troubled line was {row}'
            )
            raise bpe

        records_synced += 1

    return records_synced