def __getreportdataraw(self, report_id, file_id, chunk_size): request = self.__api.files().get(reportId=report_id, fileId=file_id) result = APIRequest(request).execute() et = 0 retry_attempts = 0 max_wait_time = 500 while True: if result['status'] == 'REPORT_AVAILABLE': request = self.__api.files().get_media(reportId=report_id, fileId=file_id) data = StringIO() downloader = httpMediaHandler.MediaIoBaseDownload( data, request, chunksize=chunk_size) done = False while done is False: unused_status, done = downloader.next_chunk(num_retries=4) data.seek(0) return data wait_time = min(max_wait_time, 2**retry_attempts) retry_attempts += 1 time.sleep(wait_time) et += wait_time if et >= DCMConnector._DCM_TIMEOUT: raise DCMAPITimeOut('DCM API Request Timeout (files.get())') request = self.__api.files().get(reportId=report_id, fileId=file_id) result = APIRequest(request).execute()
def __getreportdataraw(self, report_id): request = self.__api.reports().get(reportId=report_id) result = APIRequest(request).execute() et = 0 retry_attempts = 0 max_wait_time = 500 while True: if result['isReportReady']: request = self.__api.reports().getFile(reportId=report_id, reportFragment=0) data = StringIO() downloader = httpMediaHandler.MediaIoBaseDownload( data, request, chunksize=2**20 * 20) # 20Mb chunks done = False while done is False: unused_status, done = downloader.next_chunk() data.seek(0) return data wait_time = min(max_wait_time, 2**retry_attempts) retry_attempts += 1 time.sleep(wait_time) et += wait_time if et >= DSConnector._DS_TIMEOUT: raise DSAPITimeOut('DS API Request Timeout (files.get())') request = self.__api.reports().get(reportId=report_id) result = APIRequest(request).execute()
def createquery(self, body): """Create new query. This method creates a new query (report). Args: body: JSON object describing report. See 'Queries resource' on DBM API docs (https://developers.google.com/bid-manager/v1/queries#resource) Returns: ID of the newly created query """ request = self.__api.queries().createquery(body=body) result = APIRequest(request).execute() return result.get('queryId')
def gct_createtask(self, queue_name, payload, tag=None): """Insert new task into the Cloud Task queue. Args: queue_name: Cloud Task queue name payload: Task payload tag: Tag for the task Returns: Task resource for the newly created task """ parent = ('projects/' + self.project_id + '/locations/' + self.gae_location + '/queues/' + queue_name) body = { 'responseView': 'FULL', 'task': { 'pullMessage': { 'payload': payload } } } if tag: body['task']['pullMessage']['tag'] = tag request = self.__gctapi.projects().locations().queues().tasks().create( parent=parent, body=body) response = APIRequest(request).execute() return response
def gct_listtasks(self, queue_name): parent = ('projects/' + self.project_id + '/locations/' + self.gae_location + '/queues/' + queue_name) request = self.__gctapi.projects().locations().queues().tasks().list( parent=parent) response = APIRequest(request).execute() return response
def sdfdownloadadgroup(self, advertiser_id, sanitize_rows=True): """Download Ad Groups in SDF format. Args: advertiser_id: DBM advertiser ID sanitize_rows: Whether to remove commas, quotes and new lines from each row Returns: List with rows, one per Ad Group """ body = { 'fileTypes': ['AD_GROUP'], 'filterType': 'ADVERTISER_ID', 'filterIds': [] } body['filterIds'].append(advertiser_id) request = self.__api.sdf().download(body=body) sdfdata = APIRequest(request).execute() data = list() dataio = TextUtils.toascii(sdfdata['adGroups']) if dataio: reader = csv.reader(StringIO(dataio)) for row in reader: if not row: break temp_row = row if sanitize_rows: temp_row = TextUtils.removecommas(temp_row) temp_row = TextUtils.removequotes(temp_row) temp_row = TextUtils.removenewlines(temp_row) data.append(temp_row) return data
def gct_acknowledgetask(self, task_name, schedule_time): ack_body = { 'scheduleTime': schedule_time } request = self.__gctapi.projects().locations().queues().tasks().acknowledge( name=task_name, body=ack_body) response = APIRequest(request).execute() return response
def gce_deleteinstance(self, name, zone=None): if not zone: assert self.gce_zone else: self.gce_zone = zone request = self.__gceapi.instances().delete( project=self.project_id, zone=self.gce_zone, instance=name) return APIRequest(request).execute()
def bq_deletetable(self, dataset, table): """Delete bigQuery table. Args: dataset: BigQuery dataset ID table: BigQuery table ID """ request = self.__gbqapi.tables().delete( projectId=self.project_id, datasetId=dataset, tableId=table) APIRequest(request).execute()
def deletequery(self, query_id): """Delete query. Remove the query with the specified ID from the system. Args: query_id: ID of the query to be deleted """ request = self.__api.queries().deletequery(queryId=query_id) APIRequest(request).execute()
def deletereport(self, report_id): """Delete a report. Delete a report with the given ID. Args: report_id: Report ID. """ request = self.__api.reports().delete(profileId=self.__profile_id, reportId=report_id) APIRequest(request).execute()
def gce_configinstance(self, name, zone, machine_type, service_account, scopes): """Create configuration for GCE instance. Args: name: Instance name zone: zone (e.g.: us-central1-f) machine_type: Machine type (e.g.: n1-standard-1) service_account: email of service account authorized for this instance scopes: Authorization scopes for service account Returns: Dict object with instance resource (see https://cloud.google.com/compute/docs/reference/latest/instances#resource) """ self.gce_zone = zone request = self.__gceapi.images().getFromFamily( project='debian-cloud', family='debian-9') image_response = APIRequest(request).execute() source_disk_image = image_response['selfLink'] config = { 'name': name, 'description': 'Auto created instance', 'machineType': 'zones/%s/machineTypes/%s' % (self.gce_zone, machine_type), 'networkInterfaces': [{ 'network': 'global/networks/default', 'accessConfigs': [{ 'type': 'ONE_TO_ONE_NAT', 'name': 'External NAT' }] }], 'disks': [{ 'boot': True, 'autoDelete': True, 'initializeParams': { 'sourceImage': source_disk_image, } }], 'metadata': { 'items': [] }, 'serviceAccounts': [{ 'email': service_account, 'scopes': scopes }] } return config
def getadvertisers(self): """Get list of advertisers. Get advertisers from DCM API. Returns: List of advertisers resource (see https://developers.google.com/doubleclick-advertisers/v2.8/advertisers#resource) """ request = self.__api.advertisers().list(profileId=self.__profile_id) result = APIRequest(request).execute() return result['advertisers']
def bq_importfromgcs(self, gcsuri, dataset, table, schema, encoding, writemode='WRITE_TRUNCATE'): """Import CSV in CloudStorage into BigQuery table. Args: gcsuri: URI of the CloudStorage file dataset: Target BigQuery dataset table: Target BigQuery table schema: Schema for the new BigQuery table encoding: Encoding of the file in CloudStorage writemode: Write mode for the new table Returns: BigQuery's import job ID """ if isinstance(gcsuri, list): source_uris = gcsuri else: source_uris = [gcsuri] job_id = str(uuid.uuid4()) job_data = { 'jobReference': { 'projectId': self.project_id, 'jobId': job_id }, 'configuration': { 'load': { 'sourceUris': source_uris, 'schema': schema, 'destinationTable': { 'projectId': self.project_id, 'datasetId': dataset, 'tableId': table }, 'skipLeadingRows': 1, 'writeDisposition': writemode, 'fieldDelimiter': ',', 'encoding': encoding, 'allowLargeResults': True } } } request = self.__gbqapi.jobs().insert( projectId=self.project_id, body=job_data) APIRequest(request).execute() return job_id
def gce_waitforoperation(self, opname): assert self.gce_zone retries_left = 120 while (retries_left > 0): request = self.__gceapi.zoneOperations().get( project=self.project_id, zone=self.gce_zone, operation=opname) result = APIRequest(request).execute() if result['status'] == 'DONE': if 'error' in result: raise Exception(result['error']) return result time.sleep(1) retries_left -= 1 raise Exception('Timeout')
def runreport(self, report_id): """Run a report. Execute the report with the provided ID. Args: report_id: Report ID Returns: File ID for this report execution """ request = self.__api.reports().run(profileId=self.__profile_id, reportId=report_id) result = APIRequest(request).execute() return result['id']
def getcreatives(self, campaign_id): """Get list of creatives. Get list of creatives from DCM API. Args: campaign_id: Campaign ID. Only creatives under this campaign will be returned Returns: List of creatives resource (see https://developers.google.com/doubleclick-advertisers/v2.8/creatives) """ request = self.__api.ads().list(profileId=self.__profile_id, campaingId=campaign_id) result = APIRequest(request).execute() return result['creatives']
def createreport(self, report_obj): """Creates a new report. Creates a new DCM report. Args: report_obj: Report resource (see https://developers.google.com/doubleclick-advertisers/v2.8/reports#resource) Returns: ID of the newly created report """ assert isinstance(report_obj, DCMReport) request = self.__api.reports().insert(profileId=self.__profile_id, body=report_obj.getbody()) result = APIRequest(request).execute() return result['id']
def createreport(self, report_obj): """Create a new report. Creates a new report using DS API. Creation of a report triggers its execution Args: report_obj: Report descriptor (see https://developers.google.com/doubleclick-search/v2/reference/reports#resource-representations) Returns: ID of the newly created report """ assert isinstance(report_obj, DSReport) request = self.__api.reports().request(body=report_obj.getbody()) result = APIRequest(request).execute() report_id = result['id'] return report_id
def bq_copytable(self, source_dataset, source_table, dest_dataset, dest_table, writemode='WRITE_TRUNCATE'): """Copy table from one BigQuery dataset to another. Args: source_dataset: Source BigQuery dataset source_table: Source BigQuery table dest_dataset: Destination BigQuery dataset dest_table: Destination BigQuery table writemode: Write mode for the destination BigQuery table Returns: BigQuery's job ID """ job_id = str(uuid.uuid4()) job_data = { 'jobReference': { 'projectId': self.project_id, 'jobId': job_id }, 'configuration': { 'copy': { 'sourceTable': { 'projectId': self.project_id, 'datasetId': source_dataset, 'tableId': source_table }, 'destinationTable': { 'projectId': self.project_id, 'datasetId': dest_dataset, 'tableId': dest_table }, 'writeDisposition': writemode, } } } request = self.__gbqapi.jobs().insert( projectId=self.project_id, body=job_data) APIRequest(request).execute() return job_id
def bq_inserttable(self, dataset, table, schema): """Create new BigQuery table. Args: dataset: BigQuery ataset ID table: BigQuery table ID schema: Schema descriptor """ body = { 'tableReference': { 'projectId': self.project_id, 'datasetId': dataset, 'tableId': table }, 'schema': schema, } request = self.__gbqapi.tables().insert( projectId=self.project_id, datasetId=dataset, body=body) APIRequest(request).execute()
def bq_query(self, dataset, query, format_as_dict=None): """Execute query on BigQuery. Args: dataset: BigQuery dataset ID query: Query to execute format_as_dict: Whether output should be returned as a dict or not Returns: Query result. If format_as_dict, it returns a dict. This dict contains one key per value of the first field (as many keys as rows in the resultset). For each key, the value is another dict that contains the values, for that row, for every remaining field, indexed by the field name, which acts as key. In the case of format_as_dict=False, the result is a list of lists. The outer list are rows and the inner list are columns """ body = {'query': query, 'defaultDataset': {'datasetId': dataset}} request = self.__gbqapi.jobs().query(projectId=self.project_id, body=body) result = APIRequest(request).execute() fields = result['schema']['fields'] list_data = list() dict_data = dict() if 'rows' in result: headers = list() for f in fields: headers.append(f['name']) list_data.append(headers) for record in result['rows']: row = list() for idx in range(len(fields)): row.append(record['f'][idx]['v']) if idx == 0: dict_data[record['f'][0][ 'v']] = dict() # Assume first field of the query is the key else: dict_data[record['f'][0]['v']][fields[idx]['name']] = ( record['f'][idx]['v']) list_data.append(row) if format_as_dict: return dict_data else: return list_data
def gcs_uploadtable(self, gcptable, bucket, filename): """Upload table to Cloud Storage as a CSV. Args: gcptable: GCPTable instance bucket: Target Cloud Storage bucket filename: Target filename insice the Cloud Storage bucket Returns: URI of the newly created file """ assert isinstance(gcptable, GCPTable) fh = io.BytesIO() for row in gcptable.tostrrows(): fh.writelines(row + '\n') media = httpMediaHandler.MediaIoBaseUpload(fh, 'text/csv', resumable=True) request = self.__gcsapi.objects().insert( bucket=bucket, name=filename, media_body=media) APIRequest(request).execute() gcsuri = 'gs://{bucket}/{name}'.format(bucket=bucket, name=filename) return gcsuri
def bq_gettables(self, dataset): """Get BigQuery tables for a given dataset. Args: dataset: BigQuery dataset ID Returns: List of bigquery#table resources. See ( https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource-representations) """ table_list = list() tables = self.__gbqapi.tables() request = tables.list(projectId=self.project_id, datasetId=dataset) while request is not None: response = APIRequest(request).execute() if (response['totalItems'] > 0) and ('tables' in response): for table in response['tables']: table_list.append(table) request = tables.list_next( previous_request=request, previous_response=response) return table_list
def bq_readtable(self, dataset, table): """Read BigQuery table and return data. Args: dataset: Dataset ID table: Table ID Returns: Table data. Returned value is a list. Each element is a row. Each row is a list containing as many elements as columns. """ request = self.__gbqapi.tabledata().list( projectId=self.project_id, datasetId=dataset, tableId=table) result = APIRequest(request).execute() data = list() if 'rows' in result: for row in result['rows']: values = list() for item in row['f']: values.append(item['v']) data.append(values) return data
def gct_leasetask(self, queue_name, lease_secs, num_tasks=1): """Lease first available task(s) from the specified Cloud Task queue. Args: queue_name: Cloud Task queue name lease_secs: Duration of the lease in seconds num_tasks: Number of tasks to lease Returns: Resource of the leased task(s) """ parent = ('projects/' + self.project_id + '/locations/' + self.gae_location + '/queues/' + queue_name) duration = str(lease_secs) + 's' lease_body = { 'maxTasks': num_tasks, 'responseView': 'FULL', 'leaseDuration': duration } request = self.__gctapi.projects().locations().queues().tasks().lease( parent=parent, body=lease_body) response = APIRequest(request).execute() return response
def __getquerydataraw(self, query_id): """Get query data, wait if necessary. Retrieve query data. Waits for the data to be ready and then downloads data directly. Args: query_id: ID of the query Returns: File-like object with the raw report data Raises: DBMAPITimeOut: If report is not ready before DBMConnector._DBM_TIMEOUT seconds DBMAPIQueryFailed: If report is ready but doesn't contain any files """ et = 0 retry_attempts = 0 max_wait_time = 500 while True: request = self.__api.queries().getquery(queryId=query_id) result = APIRequest(request).execute() if 'googleCloudStoragePathForLatestReport' in result['metadata']: if len(result['metadata']['googleCloudStoragePathForLatestReport']) > 1: break wait_time = min(max_wait_time, 2 ** retry_attempts) retry_attempts += 1 time.sleep(wait_time) et += wait_time if et > DBMConnector._DBM_TIMEOUT: raise DBMAPITimeOut( 'DBM API Request Timeout (getquery) - Query ID: %s' % query_id) if 'googleCloudStoragePathForLatestReport' in result['metadata']: report_url = result['metadata']['googleCloudStoragePathForLatestReport'] data = urllib2.urlopen(report_url).read() return StringIO(data) else: raise DBMAPIQueryFailed('DBM Query Failed - Query ID: %s' % query_id)
def bq_insertdata(self, gcptable, dataset, table): """Insert GCPTable data into BigQuery table. Args: gcptable: GCPTable instance dataset: BigQuery dataset ID table: BigQuery table ID Returns: bigquery#tableDataInsertAllResponse resource (see https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll#response) """ assert isinstance(gcptable, GCPTable) body = {'skipInvalidRows': True, 'ignoreUnknownValues': True, 'rows': []} fields = gcptable.getfields() for row in gcptable.table: # Access raw data json_row = dict() for i in range(len(fields)): json_row[fields[i]] = row[i] body['rows'].append({'json': json_row}) request = self.__gbqapi.tabledata().insertAll( projectId=self.project_id, datasetId=dataset, tableId=table, body=body) result = APIRequest(request).execute() return result
def runquery(self, query_id, data_range=None, start_date=None, end_date=None): """Run a specified query. This method runs the specified query (report). You can optionally specify date ranges. Args: query_id: ID of the query to run data_range: Range for report as expected in metadata.dataRange field (see https://developers.google.com/bid-manager/v1/queries#resource) start_date: Only applicable if data_range is CUSTOM_DATES end_date: Only applicable if data_range is CUSTOM_DATES """ body = {} if data_range: body['dataRange'] = data_range if start_date: assert isinstance(start_date, datetime.datetime) assert isinstance(end_date, datetime.datetime) body['reportDataStartTimeMs'] = time.mktime( start_date.timetuple()) * 1000 body['reportDataEndTimeMs'] = time.mktime(end_date.timetuple()) * 1000 request = self.__api.queries().runquery(queryId=query_id, body=body) APIRequest(request).execute()
def gct_gettask(self, task_name): request = self.__gctapi.projects().locations().queues().tasks().get( name=task_name, responseView='FULL') response = APIRequest(request).execute() return response