Пример #1
0
def sync_report(stream_name, stream_metadata, sdk_client):
    customer_id = sdk_client.client_customer_id

    stream_schema, _ = create_schema_for_report(stream_name, sdk_client)
    stream_schema = add_synthetic_keys_to_stream_schema(stream_schema)

    xml_attribute_list = get_fields_to_sync(stream_schema, stream_metadata)

    primary_keys = metadata.get(stream_metadata,
                                (), 'tap-adwords.report-key-properties') or []
    LOGGER.info("{} primary keys are {}".format(stream_name, primary_keys))

    write_schema(stream_name,
                 stream_schema,
                 primary_keys,
                 bookmark_properties=['day'])

    field_list = []
    for field in xml_attribute_list:
        field_list.append(stream_metadata[('properties',
                                           field)]['adwords.fieldName'])

    check_selected_fields(stream_name, field_list, sdk_client)
    # If an attribution window sync is interrupted, start where it left off
    start_date = get_attribution_window_bookmark(customer_id, stream_name)
    if start_date is None:
        start_date = apply_conversion_window(
            get_start_for_stream(customer_id, stream_name))

    if stream_name in REPORTS_WITH_90_DAY_MAX:
        cutoff = utils.now() + relativedelta(days=-90)
        if start_date < cutoff:
            start_date = cutoff

    LOGGER.info('Selected fields: %s', field_list)

    while start_date <= get_end_date():
        sync_report_for_day(stream_name, stream_schema, sdk_client, start_date,
                            field_list)
        start_date = start_date + relativedelta(days=1)
        bookmarks.write_bookmark(STATE, state_key_name(customer_id,
                                                       stream_name),
                                 'last_attribution_window_date',
                                 utils.strftime(start_date))
        singer.write_state(STATE)
    bookmarks.clear_bookmark(STATE, state_key_name(customer_id, stream_name),
                             'last_attribution_window_date')
    singer.write_state(STATE)
    LOGGER.info("Done syncing the %s report for customer_id %s", stream_name,
                customer_id)
Пример #2
0
def sync_statistics_report(config, state, stream, sdk_client, token):
    """Sync a stream which is backed by the Criteo Statistics endpoint."""
    advertiser_ids = config.get("advertiser_ids", "")
    mdata = metadata.to_map(stream.metadata)

    stream = add_synthetic_keys_to_stream_schema(stream)

    field_list = get_field_list(stream)

    primary_keys = []
    LOGGER.info("{} primary keys are {}".format(stream.stream, primary_keys))
    singer.write_schema(
        stream.stream,
        stream.schema.to_dict(),
        primary_keys,
        bookmark_properties=["Day"],
    )

    # If an attribution window sync is interrupted, start where it left off
    start_date = get_attribution_window_bookmark(state, advertiser_ids,
                                                 stream.stream)
    if start_date is None:
        start_date = apply_conversion_window(
            config,
            get_start_for_stream(config, state, advertiser_ids, stream.stream),
        )

    # According to Criteo's documentation the StatisticsApi only supports
    # between one and three dimensions and at least one metric.
    report_dimensions = [
        field for field in field_list
        if metadata.get(mdata, ("properties",
                                field), "tap-criteo.behaviour") == "dimension"
    ]
    LOGGER.info("Selected dimensions: %s", report_dimensions)
    if not 0 <= len(report_dimensions) <= 3:
        raise ValueError(
            "%s stream only supports up to 3 selected dimensions" %
            stream.stream)
    report_metrics = [
        field for field in field_list
        if metadata.get(mdata, ("properties",
                                field), "tap-criteo.behaviour") == "metric"
    ]
    LOGGER.info("Selected metrics: %s", report_metrics)
    if not len(report_metrics) >= 1:
        raise ValueError("%s stream must have at least 1 selected metric" %
                         stream.stream)

    while start_date <= get_end_date(config):
        token = refresh_auth_token(sdk_client, token)
        sync_statistics_for_day(
            config,
            state,
            stream,
            sdk_client,
            token,
            start_date,
            report_metrics,
            report_dimensions,
        )
        start_date = start_date + relativedelta(days=1)
        bookmarks.write_bookmark(
            state,
            state_key_name(advertiser_ids, stream.stream),
            "last_attribution_window_date",
            utils.strftime(start_date),
        )
        singer.write_state(state)
    bookmarks.clear_bookmark(
        state,
        state_key_name(advertiser_ids, stream.stream),
        "last_attribution_window_date",
    )
    singer.write_state(state)
    LOGGER.info(
        "Done syncing the %s report for advertiser_ids %s",
        stream.stream,
        advertiser_ids,
    )
Пример #3
0
def sync_report(stream_name, stream_metadata, sdk_client):

    report_window_days = CONFIG.get("MAX_REPORT_TIME_WINDOW", 365)

    is_incremental = False
    if metadata.get(stream_metadata, (),
                    "replication-method") == "INCREMENTAL":
        is_incremental = True

    customer_id = sdk_client.client_customer_id

    stream_schema, _ = create_schema_for_report(stream_name, sdk_client)
    stream_schema = add_synthetic_keys_to_stream_schema(stream_schema)

    xml_attribute_list = get_fields_to_sync(stream_schema, stream_metadata)

    primary_keys = metadata.get(stream_metadata,
                                (), 'tap-adwords.report-key-properties') or []
    LOGGER.info("{} primary keys are {}".format(stream_name, primary_keys))

    write_schema(stream_name,
                 stream_schema,
                 primary_keys,
                 bookmark_properties=['day'])

    field_list = []
    for field in xml_attribute_list:
        field_list.append(stream_metadata[('properties',
                                           field)]['adwords.fieldName'])

    check_selected_fields(stream_name, field_list, sdk_client)
    # If an attribution window sync is interrupted, start where it left off
    start_date = get_attribution_window_bookmark(customer_id, stream_name)
    if start_date is not None:
        start_date = start_date + relativedelta(days=1)

    if start_date is None:
        start_date = apply_conversion_window(
            get_start_for_stream(customer_id, stream_name))

    if stream_name in REPORTS_WITH_90_DAY_MAX:
        cutoff = utils.now() + relativedelta(days=-90)
        if start_date < cutoff:
            LOGGER.warning(
                "report only supports up to 90 days, will start at {}".format(
                    start_date))
            start_date = cutoff

    start_date = start_date.replace(hour=0, minute=0, second=0, microsecond=0)

    LOGGER.info('Selected fields: %s', field_list)

    max_end_date = utils.now() - relativedelta(days=1)
    required_end_date = get_end_date()

    report_end_date = min(max_end_date, required_end_date)
    report_end_date = report_end_date.replace(hour=23,
                                              minute=59,
                                              second=59,
                                              microsecond=0)

    next_start_date = start_date

    is_single_day_report = stream_name in REPORTS_REQUIRING_DAILY_REPORTS
    start_plus_window = next_start_date
    if not is_single_day_report:
        start_plus_window += relativedelta(days=report_window_days)
    end_date = min(start_plus_window, report_end_date)

    while next_start_date <= report_end_date:
        singer.log_info("syncing %s for %s - %s", stream_name,
                        next_start_date.strftime("%Y-%m-%d"),
                        end_date.strftime("%Y-%m-%d"))
        actual_end_date = min(end_date, report_end_date)
        sync_report_for_day(stream_name, stream_schema, sdk_client,
                            next_start_date, field_list, actual_end_date)
        next_start_date = end_date + relativedelta(days=1)

        start_plus_window = next_start_date
        if not is_single_day_report:
            start_plus_window += relativedelta(days=report_window_days)

        end_date = start_plus_window

        bookmarks.write_bookmark(STATE, state_key_name(customer_id,
                                                       stream_name),
                                 'last_attribution_window_date',
                                 actual_end_date.strftime(utils.DATETIME_FMT))
        singer.write_state(STATE)
    if not is_incremental:
        bookmarks.clear_bookmark(STATE, state_key_name(customer_id,
                                                       stream_name),
                                 'last_attribution_window_date')
    singer.write_state(STATE)
    LOGGER.info("Done syncing the %s report for customer_id %s", stream_name,
                customer_id)