def rows_to_table(auth, project_id, dataset_id, table_id, rows, schema=[], skip_rows=1, disposition='WRITE_TRUNCATE', wait=True): if project.verbose: print('BIGQUERY ROWS TO TABLE: ', project_id, dataset_id, table_id) buffer_data = BytesIO() buffer_writer = codecs.getwriter('utf-8') writer = csv.writer(buffer_writer(buffer_data), delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) has_rows = False if rows == []: if project.verbose: print('BigQuery Zero Rows') return io_to_table(auth, project_id, dataset_id, table_id, buffer_data, 'CSV', schema, skip_rows, disposition, wait) for is_last, row in flag_last(rows): # write row to csv buffer writer.writerow(row) # write the buffer in chunks if is_last or buffer_data.tell() + 1 > BIGQUERY_BUFFERSIZE: if project.verbose: print('BigQuery Buffer Size', buffer_data.tell()) buffer_data.seek(0) # reset for read io_to_table(auth, project_id, dataset_id, table_id, buffer_data, 'CSV', schema, skip_rows, disposition) # reset buffer for next loop, be sure to do an append to the table buffer_data.seek(0) #reset for write buffer_data.truncate() # reset for write ( yes its needed for EOF marker ) disposition = 'WRITE_APPEND' # append all remaining records skip_rows = 0 has_rows = True # if no rows, clear table to simulate empty write if not has_rows: if project.verbose: print('BigQuery Zero Rows') return io_to_table(auth, project_id, dataset_id, table_id, buffer_data, 'CSV', schema, skip_rows, disposition, wait)
def json_to_table(auth, project_id, dataset_id, table_id, json_data, schema=None, disposition='WRITE_TRUNCATE', wait=True): if project.verbose: print('BIGQUERY JSON TO TABLE: ', project_id, dataset_id, table_id) buffer_data = BytesIO() has_rows = False for is_last, record in flag_last(json_data): # check if json is already string encoded, and write to buffer buffer_data.write((record if isinstance(record, str) else json.dumps(record)).encode('utf-8')) # write the buffer in chunks if is_last or buffer_data.tell() + 1 > BIGQUERY_BUFFERSIZE: if project.verbose: print('BigQuery Buffer Size', buffer_data.tell()) buffer_data.seek(0) # reset for read io_to_table(auth, project_id, dataset_id, table_id, buffer_data, 'NEWLINE_DELIMITED_JSON', schema, 0, disposition) # reset buffer for next loop, be sure to do an append to the table buffer_data.seek(0) #reset for write buffer_data.truncate() # reset for write ( yes its needed for EOF marker ) disposition = 'WRITE_APPEND' # append all remaining records has_rows = True # if not end append newline, for newline delimited json else: buffer_data.write('\n'.encode('utf-8')) # if no rows, clear table to simulate empty write if not has_rows: if project.verbose: print('BigQuery Zero Rows') return io_to_table(auth, project_id, dataset_id, table_id, buffer_data, 'NEWLINE_DELIMITED_JSON', schema, skip_rows, disposition, wait)
def conversions_upload(auth, account, floodlight_activity_id, conversion_type, conversion_rows, encryption_entity=None, update=False): """ Uploads an offline conversion list to DCM. BulletProofing: https://developers.google.com/doubleclick-advertisers/guides/conversions_upload Handles errors and segmentation of conversion so list can be any size. Args: * auth: (string) Either user or service. * account: (string) [account:advertiser@profile] token. * floodlight_activity_id: (int) ID of DCM floodlight to upload conversions to. * converstion_type: (string) One of the following: encryptedUserId, encryptedUserIdCandidates, gclid, mobileDeviceId. * conversion_rows: (iterator) List of the following rows: Ordinal, timestampMicros, encryptedUserId | encryptedUserIdCandidates | gclid | mobileDeviceId. * encryption_entity: (object) See EncryptionInfo docs: https://developers.google.com/doubleclick-advertisers/v3.2/conversions/batchinsert#encryptionInfo """ account_id, advertiser_id = parse_account(auth, account) is_superuser, profile_id = get_profile_for_api(auth, account_id) kwargs = { 'profileId':profile_id, 'accountId':account_id } if is_superuser else { 'profileId':profile_id } kwargs['id'] = floodlight_activity_id response = API_DCM(auth, internal=is_superuser).floodlightActivities().get(**kwargs).execute() # upload in batch sizes of DCM_CONVERSION_SIZE row_count = 0 row_buffer = [] for is_last, row in flag_last(conversion_rows): row_buffer.append(row) if is_last or len(row_buffer) == DCM_CONVERSION_SIZE: if project.verbose: print('CONVERSION UPLOADING ROWS: %d - %d' % (row_count, row_count + len(row_buffer))) body = { 'conversions': [{ 'floodlightActivityId': floodlight_activity_id, 'floodlightConfigurationId': response['floodlightConfigurationId'], 'ordinal': row[0], 'timestampMicros': row[1], 'quantity':1, 'value':0.0, conversion_type: row[2], } for row in row_buffer] } if encryption_entity: body['encryptionInfo'] = encryption_entity kwargs = { 'profileId':profile_id, 'accountId':account_id } if is_superuser else { 'profileId':profile_id } kwargs['body'] = body if update: results = API_DCM(auth, internal=is_superuser).conversions().batchupdate(**kwargs).execute() else: results = API_DCM(auth, internal=is_superuser).conversions().batchinsert(**kwargs).execute() # stream back satus for status in results['status']: yield status # clear the buffer row_count += len(row_buffer) row_buffer = []