def sync(client, config, catalog, state): start_date = config.get('start_date') spreadsheet_id = config.get('spreadsheet_id') # Get selected_streams from catalog, based on state last_stream # last_stream = Previous currently synced stream, if the load was interrupted last_stream = singer.get_currently_syncing(state) LOGGER.info('last/currently syncing stream: {}'.format(last_stream)) selected_streams = [] for stream in catalog.get_selected_streams(state): selected_streams.append(stream.stream) LOGGER.info('selected_streams: {}'.format(selected_streams)) if not selected_streams: return # FILE_METADATA file_metadata = {} stream_name = 'file_metadata' file_metadata_config = STREAMS.get(stream_name) # GET file_metadata LOGGER.info('GET file_metadata') file_metadata, time_extracted = get_data( stream_name=stream_name, endpoint_config=file_metadata_config, client=client, spreadsheet_id=spreadsheet_id) # Transform file_metadata LOGGER.info('Transform file_meatadata') file_metadata_tf = transform_file_metadata(file_metadata) # LOGGER.info('file_metadata_tf = {}'.format(file_metadata_tf)) # Check if file has changed, if not break (return to __init__) last_datetime = strptime_to_utc( get_bookmark(state, stream_name, start_date)) this_datetime = strptime_to_utc(file_metadata.get('modifiedTime')) LOGGER.info('last_datetime = {}, this_datetime = {}'.format( last_datetime, this_datetime)) if this_datetime <= last_datetime: LOGGER.info( 'this_datetime <= last_datetime, FILE NOT CHANGED. EXITING.') # Update file_metadata bookmark write_bookmark(state, 'file_metadata', strftime(this_datetime)) return # Sync file_metadata if selected sync_stream(stream_name, selected_streams, catalog, state, file_metadata_tf, time_extracted) # file_metadata bookmark is updated at the end of sync # SPREADSHEET_METADATA spreadsheet_metadata = {} stream_name = 'spreadsheet_metadata' spreadsheet_metadata_config = STREAMS.get(stream_name) # GET spreadsheet_metadata LOGGER.info('GET spreadsheet_meatadata') spreadsheet_metadata, ss_time_extracted = get_data( stream_name=stream_name, endpoint_config=spreadsheet_metadata_config, client=client, spreadsheet_id=spreadsheet_id) # Transform spreadsheet_metadata LOGGER.info('Transform spreadsheet_meatadata') spreadsheet_metadata_tf = transform_spreadsheet_metadata( spreadsheet_metadata) # Sync spreadsheet_metadata if selected sync_stream(stream_name, selected_streams, catalog, state, spreadsheet_metadata_tf, \ ss_time_extracted) # SHEET_METADATA and SHEET_DATA sheets = spreadsheet_metadata.get('sheets') sheet_metadata = [] sheets_loaded = [] sheets_loaded_config = STREAMS['sheets_loaded'] if sheets: # Loop thru sheets (worksheet tabs) in spreadsheet for sheet in sheets: sheet_title = sheet.get('properties', {}).get('title') sheet_id = sheet.get('properties', {}).get('sheetId') # GET sheet_metadata and columns sheet_schema, columns = get_sheet_metadata(sheet, spreadsheet_id, client) # LOGGER.info('sheet_schema: {}'.format(sheet_schema)) # SKIP empty sheets (where sheet_schema and columns are None) if not sheet_schema or not columns: LOGGER.info('SKIPPING Empty Sheet: {}'.format(sheet_title)) else: # Transform sheet_metadata sheet_metadata_tf = transform_sheet_metadata( spreadsheet_id, sheet, columns) # LOGGER.info('sheet_metadata_tf = {}'.format(sheet_metadata_tf)) sheet_metadata.append(sheet_metadata_tf) # SHEET_DATA # Should this worksheet tab be synced? if sheet_title in selected_streams: LOGGER.info('STARTED Syncing Sheet {}'.format(sheet_title)) update_currently_syncing(state, sheet_title) selected_fields = get_selected_fields(catalog, sheet_title) LOGGER.info('Stream: {}, selected_fields: {}'.format( sheet_title, selected_fields)) write_schema(catalog, sheet_title) # Emit a Singer ACTIVATE_VERSION message before initial sync (but not subsequent syncs) # everytime after each sheet sync is complete. # This forces hard deletes on the data downstream if fewer records are sent. # https://github.com/singer-io/singer-python/blob/master/singer/messages.py#L137 last_integer = int(get_bookmark(state, sheet_title, 0)) activate_version = int(time.time() * 1000) activate_version_message = singer.ActivateVersionMessage( stream=sheet_title, version=activate_version) if last_integer == 0: # initial load, send activate_version before AND after data sync singer.write_message(activate_version_message) LOGGER.info( 'INITIAL SYNC, Stream: {}, Activate Version: {}'. format(sheet_title, activate_version)) # Determine max range of columns and rows for "paging" through the data sheet_last_col_index = 1 sheet_last_col_letter = 'A' for col in columns: col_index = col.get('columnIndex') col_letter = col.get('columnLetter') if col_index > sheet_last_col_index: sheet_last_col_index = col_index sheet_last_col_letter = col_letter sheet_max_row = sheet.get('properties').get( 'gridProperties', {}).get('rowCount') # Initialize paging for 1st batch is_last_row = False batch_rows = 200 from_row = 2 if sheet_max_row < batch_rows: to_row = sheet_max_row else: to_row = batch_rows # Loop thru batches (each having 200 rows of data) while not is_last_row and from_row < sheet_max_row and to_row <= sheet_max_row: range_rows = 'A{}:{}{}'.format(from_row, sheet_last_col_letter, to_row) # GET sheets_loaded for a worksheet tab sheet_data, time_extracted = get_data( stream_name='sheets_loaded', endpoint_config=sheets_loaded_config, client=client, sheet_title=sheet_title, spreadsheet_id=spreadsheet_id, range_rows=range_rows) # Data is returned as a list of arrays, an array of values for each row sheet_data_rows = sheet_data.get('values', []) # Transform batch of rows to JSON with keys for each column sheet_data_tf, row_num = transform_sheet_data( spreadsheet_id=spreadsheet_id, sheet_id=sheet_id, sheet_title=sheet_title, from_row=from_row, columns=columns, sheet_data_rows=sheet_data_rows) if row_num < to_row: is_last_row = True # Process records, send batch of records to target record_count = process_records( catalog=catalog, stream_name=sheet_title, records=sheet_data_tf, time_extracted=ss_time_extracted, version=activate_version) LOGGER.info('Sheet: {}, records processed: {}'.format( sheet_title, record_count)) # Update paging from/to_row for next batch from_row = to_row + 1 if to_row + batch_rows > sheet_max_row: to_row = sheet_max_row else: to_row = to_row + batch_rows # End of Stream: Send Activate Version and update State singer.write_message(activate_version_message) write_bookmark(state, sheet_title, activate_version) LOGGER.info( 'COMPLETE SYNC, Stream: {}, Activate Version: {}'. format(sheet_title, activate_version)) LOGGER.info( 'FINISHED Syncing Sheet {}, Total Rows: {}'.format( sheet_title, row_num - 2)) # subtract 1 for header row update_currently_syncing(state, None) # SHEETS_LOADED # Add sheet to sheets_loaded sheet_loaded = {} sheet_loaded['spreadsheetId'] = spreadsheet_id sheet_loaded['sheetId'] = sheet_id sheet_loaded['title'] = sheet_title sheet_loaded['loadDate'] = strftime(utils.now()) sheet_loaded['lastRowNumber'] = row_num sheets_loaded.append(sheet_loaded) stream_name = 'sheet_metadata' # Sync sheet_metadata if selected sync_stream(stream_name, selected_streams, catalog, state, sheet_metadata) stream_name = 'sheets_loaded' # Sync sheet_metadata if selected sync_stream(stream_name, selected_streams, catalog, state, sheets_loaded) # Update file_metadata bookmark write_bookmark(state, 'file_metadata', strftime(this_datetime)) return
def test_small_years(self): self.assertEqual(u.strftime(dt(90, 1, 1, tzinfo=pytz.UTC)), '0090-01-01T00:00:00.000000Z')
def append_times_to_dates(item, date_fields): if date_fields: for date_field in date_fields: if item.get(date_field): item[date_field] = utils.strftime( utils.strptime_with_tz(item[date_field]))
def sync_records(sf, catalog_entry, state, counter): chunked_bookmark = singer_utils.strptime_with_tz(sf.get_start_date(state, catalog_entry)) stream = catalog_entry['stream'] schema = catalog_entry['schema'] stream_alias = catalog_entry.get('stream_alias') catalog_metadata = metadata.to_map(catalog_entry['metadata']) replication_key = catalog_metadata.get((), {}).get('replication-key') stream_version = get_stream_version(catalog_entry, state) activate_version_message = singer.ActivateVersionMessage(stream=(stream_alias or stream), version=stream_version) start_time = singer_utils.now() LOGGER.info('Syncing Salesforce data for stream %s', stream) for rec in sf.query(catalog_entry, state): counter.increment() with Transformer(pre_hook=transform_bulk_data_hook) as transformer: rec = transformer.transform(rec, schema) rec = fix_record_anytype(rec, schema) singer.write_message( singer.RecordMessage( stream=( stream_alias or stream), record=rec, version=stream_version, time_extracted=start_time)) replication_key_value = replication_key and singer_utils.strptime_with_tz(rec[replication_key]) if sf.pk_chunking: if replication_key_value and replication_key_value <= start_time and replication_key_value > chunked_bookmark: # Replace the highest seen bookmark and save the state in case we need to resume later chunked_bookmark = singer_utils.strptime_with_tz(rec[replication_key]) state = singer.write_bookmark( state, catalog_entry['tap_stream_id'], 'JobHighestBookmarkSeen', singer_utils.strftime(chunked_bookmark)) singer.write_state(state) # Before writing a bookmark, make sure Salesforce has not given us a # record with one outside our range elif replication_key_value and replication_key_value <= start_time: state = singer.write_bookmark( state, catalog_entry['tap_stream_id'], replication_key, rec[replication_key]) singer.write_state(state) # Tables with no replication_key will send an # activate_version message for the next sync if not replication_key: singer.write_message(activate_version_message) state = singer.write_bookmark( state, catalog_entry['tap_stream_id'], 'version', None) # If pk_chunking is set, only write a bookmark at the end if sf.pk_chunking: # Write a bookmark with the highest value we've seen state = singer.write_bookmark( state, catalog_entry['tap_stream_id'], replication_key, singer_utils.strftime(chunked_bookmark))
def test_round_trip(self): now = dt.utcnow().replace(tzinfo=pytz.UTC) dtime = u.strftime(now) pdtime = u.strptime_to_utc(dtime) fdtime = u.strftime(pdtime) self.assertEqual(dtime, fdtime)
def get_objects(self): updated_at_min = self.get_bookmark() stop_time = singer.utils.now().replace(microsecond=0) date_window_size = float( Context.config.get("date_window_size", DATE_WINDOW_SIZE)) # Page through till the end of the resultset while updated_at_min < stop_time: # Bookmarking can also occur on the since_id since_id = self.get_since_id() or 1 if since_id != 1: LOGGER.info("Resuming sync from since_id %d", since_id) # It's important that `updated_at_min` has microseconds # truncated. Why has been lost to the mists of time but we # think it has something to do with how the API treats # microseconds on its date windows. Maybe it's possible to # drop data due to rounding errors or something like that? updated_at_max = updated_at_min + datetime.timedelta( days=date_window_size) if updated_at_max > stop_time: updated_at_max = stop_time while True: status_key = self.status_key or "status" query_params = self.get_query_params(since_id, status_key, updated_at_min, updated_at_max) with metrics.http_request_timer(self.name): objects = self.call_api(query_params) for obj in objects: if obj.id < since_id: # This verifies the api behavior expectation we # have that all results actually honor the # since_id parameter. raise OutOfOrderIdsError( "obj.id < since_id: {} < {}".format( obj.id, since_id)) yield obj # You know you're at the end when the current page has # less than the request size limits you set. if len(objects) < self.results_per_page: # Save the updated_at_max as our bookmark as we've synced all rows up in our # window and can move forward. Also remove the since_id because we want to # restart at 1. Context.state.get('bookmarks', {}).get(self.name, {}).pop('since_id', None) self.update_bookmark(utils.strftime(updated_at_max)) break if objects[-1].id != max([o.id for o in objects]): # This verifies the api behavior expectation we have # that all pages are internally ordered by the # `since_id`. raise OutOfOrderIdsError( "{} is not the max id in objects ({})".format( objects[-1].id, max([o.id for o in objects]))) since_id = objects[-1].id # Put since_id into the state. self.update_bookmark(since_id, bookmark_key='since_id') updated_at_min = updated_at_max
def sync_endpoint(client, catalog, state, start_date, stream_name, path, endpoint_config, static_params, bookmark_query_field=None, bookmark_field=None, bookmark_type=None, data_key=None, id_fields=None, selected_streams=None, parent=None, parent_id=None): # Get the latest bookmark for the stream and set the last_integer/datetime last_datetime = None last_integer = None max_bookmark_value = None if bookmark_type == 'integer': last_integer = get_bookmark(state, stream_name, 0) max_bookmark_value = last_integer else: last_datetime = get_bookmark(state, stream_name, start_date) max_bookmark_value = last_datetime end_dttm = utils.now() end_dt = end_dttm.date() start_dttm = end_dttm start_dt = end_dt if bookmark_query_field: if bookmark_type == 'datetime': start_dttm = strptime_to_utc(last_datetime) start_dt = start_dttm.date() start_dt_str = strftime(start_dttm)[0:10] # date_list provides one date for each date in range # Most endpoints, witout a bookmark query field, will have a single date (today) # Clicks endpoint will have a date for each day from bookmark to today date_list = [ str(start_dt + timedelta(days=x)) for x in range((end_dt - start_dt).days + 1) ] endpoint_total = 0 total_records = 0 limit = 1000 # PageSize (default for API is 100) for bookmark_date in date_list: page = 1 offset = 0 total_records = 0 if stream_name == 'clicks': LOGGER.info('Stream: {}, Syncing bookmark_date = {}'.format( stream_name, bookmark_date)) next_url = '{}/{}.json'.format(client.base_url, path) while next_url: # Squash params to query-string params params = { "PageSize": limit, **static_params # adds in endpoint specific, sort, filter params } if bookmark_query_field: if bookmark_type == 'datetime': params[bookmark_query_field] = bookmark_date elif bookmark_type == 'integer': params[bookmark_query_field] = last_integer if page == 1 and not params == {}: param_string = '&'.join([ '%s=%s' % (key, value) for (key, value) in params.items() ]) querystring = param_string.replace( '<parent_id>', str(parent_id)).replace( '<last_datetime>', strptime_to_utc(last_datetime).strftime( '%Y-%m-%dT%H:%M:%SZ')) else: querystring = None LOGGER.info('URL for Stream {}: {}{}'.format( stream_name, next_url, '?{}'.format(querystring) if querystring else '')) # API request data data = {} data = client.get(url=next_url, path=path, params=querystring, endpoint=stream_name) # time_extracted: datetime when the data was extracted from the API time_extracted = utils.now() if not data or data is None or data == {}: total_records = 0 break # No data results # Get pagination details api_total = int(data.get('@total', '0')) page_size = int(data.get('@pagesize', '0')) if page_size: if page_size > limit: limit = page_size next_page_uri = data.get('@nextpageuri', None) if next_page_uri: next_url = '{}{}'.format(BASE_URL, next_page_uri) else: next_url = None # Break out of loop if only paginations details data (no records) # or no data_key in data # company_information and report_metadata do not have pagination details if not stream_name in ('company_information', 'report_metadata'): # catalog_items has bug where api_total is always 0 if (not stream_name == 'catalog_items') and ( api_total == 0) and (not next_url): break if not data_key in data: break # Transform data with transform_json from transform.py # The data_key identifies the array/list of records below the <root> element # LOGGER.info('data = {}'.format(data)) # TESTING, comment out transformed_data = [] # initialize the record list data_list = [] data_dict = {} if isinstance(data, list) and not data_key in data: data_list = data data_dict[data_key] = data_list transformed_data = transform_json(data_dict, stream_name, data_key) elif isinstance(data, dict) and not data_key in data: data_list.append(data) data_dict[data_key] = data_list transformed_data = transform_json(data_dict, stream_name, data_key) else: transformed_data = transform_json(data, stream_name, data_key) # LOGGER.info('transformed_data = {}'.format(transformed_data)) # TESTING, comment out if not transformed_data or transformed_data is None: LOGGER.info('No transformed data for data = {}'.format(data)) total_records = 0 break # No data results # Verify key id_fields are present for record in transformed_data: for key in id_fields: if not record.get(key): LOGGER.info( 'Stream: {}, Missing key {} in record: {}'.format( stream_name, key, record)) raise RuntimeError # Process records and get the max_bookmark_value and record_count for the set of records max_bookmark_value, record_count = process_records( catalog=catalog, stream_name=stream_name, records=transformed_data, time_extracted=time_extracted, bookmark_field=bookmark_field, bookmark_type=bookmark_type, max_bookmark_value=max_bookmark_value, last_datetime=last_datetime, last_integer=last_integer, parent=parent, parent_id=parent_id) LOGGER.info('Stream {}, batch processed {} records'.format( stream_name, record_count)) # Loop thru parent batch records for each children objects (if should stream) children = endpoint_config.get('children') if children: for child_stream_name, child_endpoint_config in children.items( ): if child_stream_name in selected_streams: write_schema(catalog, child_stream_name) # For each parent record for record in transformed_data: i = 0 # Set parent_id for id_field in id_fields: if i == 0: parent_id_field = id_field if id_field == 'id': parent_id_field = id_field i = i + 1 parent_id = record.get(parent_id_field) # sync_endpoint for child LOGGER.info( 'START Sync for Stream: {}, parent_stream: {}, parent_id: {}'\ .format(child_stream_name, stream_name, parent_id)) child_path = child_endpoint_config.get( 'path', child_stream_name).format(str(parent_id)) child_bookmark_field = next( iter( child_endpoint_config.get( 'replication_keys', [])), None) child_total_records = sync_endpoint( client=client, catalog=catalog, state=state, start_date=start_date, stream_name=child_stream_name, path=child_path, endpoint_config=child_endpoint_config, static_params=child_endpoint_config.get( 'params', {}), bookmark_query_field=child_endpoint_config.get( 'bookmark_query_field'), bookmark_field=child_bookmark_field, bookmark_type=child_endpoint_config.get( 'bookmark_type'), data_key=child_endpoint_config.get( 'data_key', 'results'), id_fields=child_endpoint_config.get( 'key_properties'), selected_streams=selected_streams, parent=child_endpoint_config.get('parent'), parent_id=parent_id) LOGGER.info( 'FINISHED Sync for Stream: {}, parent_id: {}, total_records: {}'\ .format(child_stream_name, parent_id, child_total_records)) # Update the state with the max_bookmark_value for the stream if bookmark_field: write_bookmark(state, stream_name, max_bookmark_value) # Adjust total_records w/ record_count, if needed if record_count > total_records: total_records = total_records + record_count else: total_records = api_total # to_rec: to record; ending record for the batch page to_rec = offset + limit if to_rec > total_records: to_rec = total_records LOGGER.info( 'Synced Stream: {}, page: {}, {} to {} of total records: {}'. format(stream_name, page, offset, to_rec, total_records)) # Pagination: increment the offset by the limit (batch-size) and page offset = offset + limit page = page + 1 endpoint_total = endpoint_total + total_records # Return total_records (for all pages) return endpoint_total
def unix_seconds_to_datetime(value): return strftime( datetime.datetime.fromtimestamp(int(value), datetime.timezone.utc))
def _query_recur( self, query, catalog_entry, start_date_str, end_date=None, retries=MAX_RETRIES): params = {"q": query} url = "{}/services/data/v41.0/queryAll".format(self.sf.instance_url) headers = self.sf._get_standard_headers() if end_date is None: end_date = singer_utils.now() if retries == 0: raise TapSalesforceException( "Ran out of retries attempting to query Salesforce Object {}".format( catalog_entry['stream'])) retryable = False try: while True: resp = self.sf._make_request('GET', url, headers=headers, params=params) resp_json = resp.json() for rec in resp_json.get('records'): yield rec next_records_url = resp_json.get('nextRecordsUrl') if next_records_url is None: break else: url = "{}{}".format(self.sf.instance_url, next_records_url) except HTTPError as ex: response = ex.response.json() if isinstance(response, list) and response[0].get("errorCode") == "QUERY_TIMEOUT": start_date = singer_utils.strptime_with_tz(start_date_str) day_range = (end_date - start_date).days LOGGER.info( "Salesforce returned QUERY_TIMEOUT querying %d days of %s", day_range, catalog_entry['stream']) retryable = True else: raise ex if retryable: start_date = singer_utils.strptime_with_tz(start_date_str) half_day_range = (end_date - start_date) // 2 end_date = end_date - half_day_range if half_day_range.days == 0: raise TapSalesforceException( "Attempting to query by 0 day range, this would cause infinite looping.") query = self.sf._build_query_string(catalog_entry, singer_utils.strftime(start_date), singer_utils.strftime(end_date)) for record in self._query_recur( query, catalog_entry, start_date_str, end_date, retries - 1): yield record
def unix_milliseconds_to_datetime(value): return strftime( datetime.datetime.fromtimestamp( float(value) / 1000.0, datetime.timezone.utc))
def string_to_datetime(value): try: return strftime(strptime_to_utc(value)) except Exception as ex: LOGGER.log_warning("%s, (%s)", ex, value) return None
def string_to_datetime(value): try: return strftime(pendulum.parse(value)) except: return None
def sync_generic_basic_endpoint(sdk_client, stream, stream_metadata): discovered_schema = load_schema(stream) field_list = get_field_list(discovered_schema, stream, stream_metadata) discovered_schema['properties']['_sdc_customer_id'] = { 'description': 'Profile ID', 'type': 'string', 'field': "customer_id" } primary_keys = GENERIC_ENDPOINT_MAPPINGS[stream]['primary_keys'] write_schema(stream, discovered_schema, primary_keys) LOGGER.info("Syncing %s for customer %s", stream, sdk_client.client_customer_id) start_index = 0 selector = { 'fields': field_list, 'paging': { 'startIndex': str(start_index), 'numberResults': str(PAGE_SIZE) } } while True: page = get_page(sdk_client, selector, stream, start_index) if page['totalNumEntries'] > GOOGLE_MAX_START_INDEX: raise Exception("Too many %s (%s > %s) for customer %s", stream, GOOGLE_MAX_START_INDEX, page['totalNumEntries'], sdk_client.client_customer_id) if 'entries' in page: with metrics.record_counter(stream) as counter: time_extracted = utils.now() with Transformer( singer.UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING ) as bumble_bee: for obj in page['entries']: obj['_sdc_customer_id'] = sdk_client.client_customer_id bumble_bee.pre_hook = transform_pre_hook # At this point the `record` is wrong because of # the comment below. record = bumble_bee.transform(obj, discovered_schema) # retransform `startDate` and `endDate` if this is # campaigns as the transformer doesn't currently # have support for dates # # This will cause a column split in the warehouse # if we ever change to a `date` type so be wary. if stream == 'campaigns': # [API Docs][1] state that these fields are # formatted as dates using `YYYYMMDD` and that # when they're added they default to the # timezone of the parent account. The # description is not super clear so we're # making some assumptions at this point: 1. # The timezone of the parent account is the # timezone that the date string is in and 2. # That there's no way to to create a campaign # is a different time zone (if there is we # don't appear to have a way to retrieve it). # # We grab the parent account's timezone from # the ManagedCustomerService and cast these to # that timezone and then format them as if # they were datetimes which is not quite # accurate but is the best we can currently # do. # # [1]: https://developers.google.com/adwords/api/docs/reference/v201809/CampaignService.Campaign#startdate parent_account_tz_str = get_and_cache_parent_account_tz_str( sdk_client) if record.get('startDate'): naive_date = datetime.datetime.strptime( obj['startDate'], '%Y%m%d') utc_date = pytz.timezone( parent_account_tz_str).localize( naive_date).astimezone(tz=pytz.UTC) record['startDate'] = utils.strftime(utc_date) if record.get('endDate'): naive_date = datetime.datetime.strptime( obj['endDate'], '%Y%m%d') utc_date = pytz.timezone( parent_account_tz_str).localize( naive_date).astimezone(tz=pytz.UTC) record['endDate'] = utils.strftime(utc_date) singer.write_record(stream, record, time_extracted=time_extracted) counter.increment() start_index += PAGE_SIZE if start_index > int(page['totalNumEntries']): break LOGGER.info("Done syncing %s for customer_id %s", stream, sdk_client.client_customer_id)
}, "ad_groups": { 'primary_keys': ["id"], 'service_name': 'AdGroupService' }, "ads": { 'primary_keys': ["adGroupId"], 'service_name': 'AdGroupAdService' }, "accounts": { 'primary_keys': ["customerId"], 'service_name': 'ManagedCustomerService' } } REPORT_RUN_DATETIME = utils.strftime(utils.now()) VERIFIED_REPORTS = frozenset([ 'ACCOUNT_PERFORMANCE_REPORT', 'ADGROUP_PERFORMANCE_REPORT', # 'AD_CUSTOMIZERS_FEED_ITEM_REPORT', 'AD_PERFORMANCE_REPORT', 'AGE_RANGE_PERFORMANCE_REPORT', 'AUDIENCE_PERFORMANCE_REPORT', # 'AUTOMATIC_PLACEMENTS_PERFORMANCE_REPORT', # 'BID_GOAL_PERFORMANCE_REPORT', #'BUDGET_PERFORMANCE_REPORT', -- does NOT allow for querying by date range 'CALL_METRICS_CALL_DETAILS_REPORT', #'CAMPAIGN_AD_SCHEDULE_TARGET_REPORT', #'CAMPAIGN_CRITERIA_REPORT', #'CAMPAIGN_GROUP_PERFORMANCE_REPORT',