示例#1
0
def query_and_upload_to_s3(schema, table_export_setting, date_range):
    """
    Query the database and upload the results to s3.

    Args:
        schema (str): Account schema name in which to execute the query.
        table_export_setting (TableExportSetting): Settings for the table export.
        date_range (tuple): Pair of date objects of inclusive start and end dates.

    """
    uploader = AwsS3Uploader(settings.S3_BUCKET_NAME)
    start_date, end_date = date_range
    iterate_daily = table_export_setting.iterate_daily
    dates_to_iterate = rrule(DAILY,
                             dtstart=start_date,
                             until=end_date if iterate_daily else start_date)

    with connection.cursor() as cursor:
        cursor.db.set_schema(schema)
        for the_date in dates_to_iterate:
            upload_path = get_upload_path(
                schema,
                table_export_setting.provider,
                the_date,
                table_export_setting.output_name,
                iterate_daily,
            )
            cursor.execute(
                table_export_setting.sql.format(schema=schema),
                {
                    'start_date': the_date,
                    'end_date': the_date if iterate_daily else end_date,
                },
            )
            # Don't upload if result set is empty
            if cursor.rowcount == 0:
                return
            with NamedTemporaryGZip() as temp_file:
                writer = csv.writer(temp_file,
                                    quotechar='"',
                                    quoting=csv.QUOTE_MINIMAL)
                writer.writerow([field.name for field in cursor.description])
                for row in cursor.fetchall():
                    writer.writerow(row)
                temp_file.close()
                uploader.upload_file(temp_file.name, upload_path)
示例#2
0
def query_and_upload_to_s3(schema_name, provider_uuid, table_export_setting,
                           start_date, end_date):
    """
    Query the database and upload the results to s3.

    Args:
        schema_name (str): Account schema name in which to execute the query.
        provider_uuid (UUID): Provider UUID for filtering the query.
        table_export_setting (dict): Settings for the table export.
        start_date (string): start date (inclusive)
        end_date (string): end date (inclusive)

    """
    if not settings.ENABLE_S3_ARCHIVING:
        LOG.info("S3 Archiving is disabled. Not running task.")
        return

    LOG.info(
        "query_and_upload_to_s3: schema %s provider_uuid %s table.output_name %s for %s",
        schema_name,
        provider_uuid,
        table_export_setting["output_name"],
        (start_date, end_date),
    )
    if isinstance(start_date, str):
        start_date = parse(start_date)
    if isinstance(end_date, str):
        end_date = parse(end_date)

    uploader = AwsS3Uploader(settings.S3_BUCKET_NAME)
    iterate_daily = table_export_setting["iterate_daily"]
    dates_to_iterate = rrule(DAILY,
                             dtstart=start_date,
                             until=end_date if iterate_daily else start_date)

    for the_date in dates_to_iterate:
        with NamedTemporaryGZip() as temp_file:
            with connection.cursor() as cursor:
                cursor.db.set_schema(schema_name)
                upload_path = get_upload_path(
                    schema_name,
                    table_export_setting["provider"],
                    provider_uuid,
                    the_date,
                    table_export_setting["output_name"],
                    iterate_daily,
                )
                cursor.execute(
                    table_export_setting["sql"].format(schema=schema_name),
                    {
                        "start_date": the_date,
                        "end_date": the_date if iterate_daily else end_date,
                        "provider_uuid": provider_uuid,
                    },
                )
                # Don't upload if result set is empty
                if cursor.rowcount == 0:
                    continue
                writer = csv.writer(temp_file,
                                    quotechar='"',
                                    quoting=csv.QUOTE_MINIMAL)
                writer.writerow([field.name for field in cursor.description])
                while True:
                    records = cursor.fetchmany(size=_DB_FETCH_BATCH_SIZE)
                    if not records:
                        break
                    for row in records:
                        writer.writerow(row)
            temp_file.close()
            uploader.upload_file(temp_file.name, upload_path)
示例#3
0
def query_and_upload_to_s3(schema_name, provider_uuid, table_export_setting,
                           date_range):
    """
    Query the database and upload the results to s3.

    Args:
        schema_name (str): Account schema name in which to execute the query.
        provider_uuid (UUID): Provider UUID for filtering the query.
        table_export_setting (TableExportSetting): Settings for the table export.
        date_range (tuple): Pair of date objects of inclusive start and end dates.

    """
    LOG.info(
        'query_and_upload_to_s3: schema %s provider_uuid %s table.output_name %s for %s',
        schema_name,
        provider_uuid,
        table_export_setting.output_name,
        date_range,
    )
    uploader = AwsS3Uploader(settings.S3_BUCKET_NAME)
    start_date, end_date = date_range
    iterate_daily = table_export_setting.iterate_daily
    dates_to_iterate = rrule(DAILY,
                             dtstart=start_date,
                             until=end_date if iterate_daily else start_date)

    for the_date in dates_to_iterate:
        with NamedTemporaryGZip() as temp_file:
            with connection.cursor() as cursor:
                cursor.db.set_schema(schema_name)
                upload_path = get_upload_path(
                    schema_name,
                    table_export_setting.provider,
                    provider_uuid,
                    the_date,
                    table_export_setting.output_name,
                    iterate_daily,
                )
                cursor.execute(
                    table_export_setting.sql.format(schema=schema_name),
                    {
                        'start_date': the_date,
                        'end_date': the_date if iterate_daily else end_date,
                        'provider_uuid': provider_uuid,
                    },
                )
                # Don't upload if result set is empty
                if cursor.rowcount == 0:
                    continue
                writer = csv.writer(temp_file,
                                    quotechar='"',
                                    quoting=csv.QUOTE_MINIMAL)
                writer.writerow([field.name for field in cursor.description])
                while True:
                    records = cursor.fetchmany(size=_DB_FETCH_BATCH_SIZE)
                    if not records:
                        break
                    for row in records:
                        writer.writerow(row)
            temp_file.close()
            uploader.upload_file(temp_file.name, upload_path)