def test_matches_template(self): base_pattern = "beam_bq_job_[A-Z]+_[a-z0-9-]+_[a-z0-9-]+(_[a-z0-9-]+)?" job_name = generate_bq_job_name("beamapp-job-test", "abcd", BigQueryJobTypes.COPY, "randome") self.assertRegex(job_name, base_pattern) job_name = generate_bq_job_name("beamapp-job-test", "abcd", BigQueryJobTypes.COPY) self.assertRegex(job_name, base_pattern)
def test_simple_names(self): self.assertEqual( "beam_bq_job_EXPORT_beamappjobtest_abcd", generate_bq_job_name( "beamapp-job-test", "abcd", BigQueryJobTypes.EXPORT)) self.assertEqual( "beam_bq_job_LOAD_beamappjobtest_abcd", generate_bq_job_name("beamapp-job-test", "abcd", BigQueryJobTypes.LOAD)) self.assertEqual( "beam_bq_job_QUERY_beamappjobtest_abcd", generate_bq_job_name( "beamapp-job-test", "abcd", BigQueryJobTypes.QUERY)) self.assertEqual( "beam_bq_job_COPY_beamappjobtest_abcd", generate_bq_job_name("beamapp-job-test", "abcd", BigQueryJobTypes.COPY))
def _export_files( self, bq: bigquery_tools.BigQueryWrapper, element: 'ReadFromBigQueryRequest', table_reference: TableReference): """Runs a BigQuery export job. Returns: bigquery.TableSchema instance, a list of FileMetadata instances """ job_labels = self._get_bq_metadata().add_additional_bq_job_labels( self.bigquery_job_labels) export_job_name = bigquery_tools.generate_bq_job_name( self._job_name, self._source_uuid, bigquery_tools.BigQueryJobTypes.EXPORT, element.obj_id) temp_location = self.options.view_as(GoogleCloudOptions).temp_location gcs_location = bigquery_export_destination_uri( self.gcs_location, temp_location, '%s%s' % (self._source_uuid, element.obj_id)) if self.use_json_exports: job_ref = bq.perform_extract_job([gcs_location], export_job_name, table_reference, bigquery_tools.FileFormat.JSON, project=self._get_project(), job_labels=job_labels, include_header=False) else: job_ref = bq.perform_extract_job([gcs_location], export_job_name, table_reference, bigquery_tools.FileFormat.AVRO, project=self._get_project(), include_header=False, job_labels=job_labels, use_avro_logical_types=True) bq.wait_for_bq_job(job_ref) metadata_list = FileSystems.match([gcs_location])[0].metadata_list if isinstance(table_reference, ValueProvider): table_ref = bigquery_tools.parse_table_reference( element.table, project=self._get_project()) else: table_ref = table_reference table = bq.get_table( table_ref.projectId, table_ref.datasetId, table_ref.tableId) return table.schema, metadata_list
def _execute_query(cls, project, query): query_job_name = bigquery_tools.generate_bq_job_name( 'materializing_table_before_reading', str(uuid.uuid4())[0:10], bigquery_tools.BigQueryJobTypes.QUERY, '%s_%s' % (int(time.time()), random.randint(0, 1000))) cls._setup_temporary_dataset(cls.project, cls.query) job = cls.bigquery_client._start_query_job(project, query, use_legacy_sql=False, flatten_results=False, job_id=query_job_name) job_ref = job.jobReference cls.bigquery_client.wait_for_bq_job(job_ref, max_retries=0) return cls.bigquery_client._get_temp_table(project)
def _execute_query(self, bq: bigquery_tools.BigQueryWrapper, element: 'ReadFromBigQueryRequest'): query_job_name = bigquery_tools.generate_bq_job_name( self._job_name, self._source_uuid, bigquery_tools.BigQueryJobTypes.QUERY, random.randint(0, 1000)) job = bq._start_query_job( self._get_project(), element.query, not element.use_standard_sql, element.flatten_results, job_id=query_job_name, kms_key=self.kms_key, job_labels=self._get_bq_metadata().add_additional_bq_job_labels( self.bigquery_job_labels)) job_ref = job.jobReference bq.wait_for_bq_job(job_ref, max_retries=0) return bq._get_temp_table(self._get_project())
def _generate_job_name(job_name, job_type, step_name): return bigquery_tools.generate_bq_job_name(job_name=job_name, step_id=step_name, job_type=job_type, random=random.randint(0, 1000))
def test_random_in_name(self): self.assertEqual( "beam_bq_job_COPY_beamappjobtest_abcd_randome", generate_bq_job_name("beamapp-job-test", "abcd", BigQueryJobTypes.COPY, "randome"))