def _insert_copy_job(self, project_id, job_id, from_table_reference, to_table_reference, create_disposition=None, write_disposition=None): reference = bigquery.JobReference() reference.jobId = job_id reference.projectId = project_id request = bigquery.BigqueryJobsInsertRequest( projectId=project_id, job=bigquery.Job( configuration=bigquery.JobConfiguration( copy=bigquery.JobConfigurationTableCopy( destinationTable=to_table_reference, sourceTable=from_table_reference, createDisposition=create_disposition, writeDisposition=write_disposition, )), jobReference=reference, )) logging.info("Inserting job request: %s", request) response = self.client.jobs.Insert(request) logging.info("Response was %s", response) return response.jobReference
def _insert_load_job(self, project_id, job_id, table_reference, source_uris, schema=None, write_disposition=None, create_disposition=None, additional_load_parameters=None): additional_load_parameters = additional_load_parameters or {} job_schema = None if schema == 'SCHEMA_AUTODETECT' else schema reference = bigquery.JobReference(jobId=job_id, projectId=project_id) request = bigquery.BigqueryJobsInsertRequest( projectId=project_id, job=bigquery.Job( configuration=bigquery.JobConfiguration( load=bigquery.JobConfigurationLoad( sourceUris=source_uris, destinationTable=table_reference, schema=job_schema, writeDisposition=write_disposition, createDisposition=create_disposition, sourceFormat='NEWLINE_DELIMITED_JSON', autodetect=schema == 'SCHEMA_AUTODETECT', **additional_load_parameters)), jobReference=reference, )) response = self.client.jobs.Insert(request) return response.jobReference
def _insert_load_job(self, project_id, job_id, table_reference, source_uris, schema=None, write_disposition=None, create_disposition=None): reference = bigquery.JobReference(jobId=job_id, projectId=project_id) request = bigquery.BigqueryJobsInsertRequest( projectId=project_id, job=bigquery.Job( configuration=bigquery.JobConfiguration( load=bigquery.JobConfigurationLoad( sourceUris=source_uris, destinationTable=table_reference, schema=schema, writeDisposition=write_disposition, createDisposition=create_disposition, sourceFormat='NEWLINE_DELIMITED_JSON', autodetect=schema is None, ) ), jobReference=reference, ) ) response = self.client.jobs.Insert(request) return response.jobReference
def perform_extract_job(self, destination, job_id, table_reference, destination_format, include_header=True, compression=ExportCompression.NONE): """Starts a job to export data from BigQuery. Returns: bigquery.JobReference with the information about the job that was started. """ job_reference = bigquery.JobReference(jobId=job_id, projectId=table_reference.projectId) request = bigquery.BigqueryJobsInsertRequest( projectId=table_reference.projectId, job=bigquery.Job( configuration=bigquery.JobConfiguration( extract=bigquery.JobConfigurationExtract( destinationUris=destination, sourceTable=table_reference, printHeader=include_header, destinationFormat=destination_format, compression=compression, ) ), jobReference=job_reference, ) ) response = self.client.jobs.Insert(request) return response.jobReference
def _start_query_job(self, project_id, query, use_legacy_sql, flatten_results, job_id, dry_run=False): reference = bigquery.JobReference(jobId=job_id, projectId=project_id) request = bigquery.BigqueryJobsInsertRequest( projectId=project_id, job=bigquery.Job( configuration=bigquery.JobConfiguration( dryRun=dry_run, query=bigquery.JobConfigurationQuery( query=query, useLegacySql=use_legacy_sql, allowLargeResults=True, destinationTable=self._get_temp_table(project_id), flattenResults=flatten_results)), jobReference=reference)) response = self.client.jobs.Insert(request) return response.jobReference.jobId
def _insert_load_job(self, project_id, job_id, table_reference, source_uris, schema=None): reference = bigquery.JobReference(jobId=job_id, projectId=project_id) request = bigquery.BigqueryJobsInsertRequest( projectId=table_reference.project_id, job=bigquery.Job( configuration=bigquery.JobConfiguration( load=bigquery.JobConfigurationLoad( source_uris=source_uris, destination_table=table_reference, ) ), jobReference=reference, ) ) response = self.client.jobs.Insert(request) return response.jobReference.jobId
def load_table(self, job_id, project_id, table_ref, table_schema, gcs_urls, create_disposition, write_disposition): job_ref = bq.JobReference(jobId=job_id, projectId=project_id) request = bq.BigqueryJobsInsertRequest( projectId=project_id, job=bq.Job( configuration=bq.JobConfiguration(load=bq.JobConfigurationLoad( createDisposition=create_disposition, destinationTable=table_ref, schema=table_schema, sourceFormat="NEWLINE_DELIMITED_JSON", sourceUris=gcs_urls, writeDisposition=write_disposition)), jobReference=job_ref)) response = self.client.jobs.Insert(request) return response.jobReference.jobId
def get_query_location(self, project_id, query, use_legacy_sql): """ Get the location of tables referenced in a query. This method returns the location of the first referenced table in the query and depends on the BigQuery service to provide error handling for queries that reference tables in multiple locations. """ reference = bigquery.JobReference(jobId=uuid.uuid4().hex, projectId=project_id) request = bigquery.BigqueryJobsInsertRequest( projectId=project_id, job=bigquery.Job( configuration=bigquery.JobConfiguration( dryRun=True, query=bigquery.JobConfigurationQuery( query=query, useLegacySql=use_legacy_sql, )), jobReference=reference)) response = self.client.jobs.Insert(request) if response.statistics is None: # This behavior is only expected in tests logging.warning( "Unable to get location, missing response.statistics. Query: %s", query) return None referenced_tables = response.statistics.query.referencedTables if referenced_tables: # Guards against both non-empty and non-None table = referenced_tables[0] location = self.get_table_location( table.projectId, table.datasetId, table.tableId) logging.info("Using location %r from table %r referenced by query %s", location, table, query) return location logging.debug("Query %s does not reference any tables.", query) return None
def _start_query_job(self, project_id, query, use_legacy_sql, flatten_results, job_id, dry_run=False, kms_key=None): reference = bigquery.JobReference(jobId=job_id, projectId=project_id) request = bigquery.BigqueryJobsInsertRequest( projectId=project_id, job=bigquery.Job( configuration=bigquery.JobConfiguration( dryRun=dry_run, query=bigquery.JobConfigurationQuery( query=query, useLegacySql=use_legacy_sql, allowLargeResults=not dry_run, destinationTable=self._get_temp_table(project_id) if not dry_run else None, flattenResults=flatten_results, destinationEncryptionConfiguration=bigquery .EncryptionConfiguration(kmsKeyName=kms_key))), jobReference=reference)) response = self.client.jobs.Insert(request) return response