def test_matches_template(self):
        base_pattern = "beam_bq_job_[A-Z]+_[a-z0-9-]+_[a-z0-9-]+(_[a-z0-9-]+)?"
        job_name = generate_bq_job_name("beamapp-job-test", "abcd",
                                        BigQueryJobTypes.COPY, "randome")
        self.assertRegex(job_name, base_pattern)

        job_name = generate_bq_job_name("beamapp-job-test", "abcd",
                                        BigQueryJobTypes.COPY)
        self.assertRegex(job_name, base_pattern)
示例#2
0
  def test_simple_names(self):
    self.assertEqual(
        "beam_bq_job_EXPORT_beamappjobtest_abcd",
        generate_bq_job_name(
            "beamapp-job-test", "abcd", BigQueryJobTypes.EXPORT))

    self.assertEqual(
        "beam_bq_job_LOAD_beamappjobtest_abcd",
        generate_bq_job_name("beamapp-job-test", "abcd", BigQueryJobTypes.LOAD))

    self.assertEqual(
        "beam_bq_job_QUERY_beamappjobtest_abcd",
        generate_bq_job_name(
            "beamapp-job-test", "abcd", BigQueryJobTypes.QUERY))

    self.assertEqual(
        "beam_bq_job_COPY_beamappjobtest_abcd",
        generate_bq_job_name("beamapp-job-test", "abcd", BigQueryJobTypes.COPY))
  def _export_files(
      self,
      bq: bigquery_tools.BigQueryWrapper,
      element: 'ReadFromBigQueryRequest',
      table_reference: TableReference):
    """Runs a BigQuery export job.

    Returns:
      bigquery.TableSchema instance, a list of FileMetadata instances
    """
    job_labels = self._get_bq_metadata().add_additional_bq_job_labels(
        self.bigquery_job_labels)
    export_job_name = bigquery_tools.generate_bq_job_name(
        self._job_name,
        self._source_uuid,
        bigquery_tools.BigQueryJobTypes.EXPORT,
        element.obj_id)
    temp_location = self.options.view_as(GoogleCloudOptions).temp_location
    gcs_location = bigquery_export_destination_uri(
        self.gcs_location,
        temp_location,
        '%s%s' % (self._source_uuid, element.obj_id))
    if self.use_json_exports:
      job_ref = bq.perform_extract_job([gcs_location],
                                       export_job_name,
                                       table_reference,
                                       bigquery_tools.FileFormat.JSON,
                                       project=self._get_project(),
                                       job_labels=job_labels,
                                       include_header=False)
    else:
      job_ref = bq.perform_extract_job([gcs_location],
                                       export_job_name,
                                       table_reference,
                                       bigquery_tools.FileFormat.AVRO,
                                       project=self._get_project(),
                                       include_header=False,
                                       job_labels=job_labels,
                                       use_avro_logical_types=True)
    bq.wait_for_bq_job(job_ref)
    metadata_list = FileSystems.match([gcs_location])[0].metadata_list

    if isinstance(table_reference, ValueProvider):
      table_ref = bigquery_tools.parse_table_reference(
          element.table, project=self._get_project())
    else:
      table_ref = table_reference
    table = bq.get_table(
        table_ref.projectId, table_ref.datasetId, table_ref.tableId)

    return table.schema, metadata_list
示例#4
0
 def _execute_query(cls, project, query):
     query_job_name = bigquery_tools.generate_bq_job_name(
         'materializing_table_before_reading',
         str(uuid.uuid4())[0:10], bigquery_tools.BigQueryJobTypes.QUERY,
         '%s_%s' % (int(time.time()), random.randint(0, 1000)))
     cls._setup_temporary_dataset(cls.project, cls.query)
     job = cls.bigquery_client._start_query_job(project,
                                                query,
                                                use_legacy_sql=False,
                                                flatten_results=False,
                                                job_id=query_job_name)
     job_ref = job.jobReference
     cls.bigquery_client.wait_for_bq_job(job_ref, max_retries=0)
     return cls.bigquery_client._get_temp_table(project)
示例#5
0
 def _execute_query(self, bq: bigquery_tools.BigQueryWrapper,
                    element: 'ReadFromBigQueryRequest'):
     query_job_name = bigquery_tools.generate_bq_job_name(
         self._job_name, self._source_uuid,
         bigquery_tools.BigQueryJobTypes.QUERY, random.randint(0, 1000))
     job = bq._start_query_job(
         self._get_project(),
         element.query,
         not element.use_standard_sql,
         element.flatten_results,
         job_id=query_job_name,
         kms_key=self.kms_key,
         job_labels=self._get_bq_metadata().add_additional_bq_job_labels(
             self.bigquery_job_labels))
     job_ref = job.jobReference
     bq.wait_for_bq_job(job_ref, max_retries=0)
     return bq._get_temp_table(self._get_project())
def _generate_job_name(job_name, job_type, step_name):
    return bigquery_tools.generate_bq_job_name(job_name=job_name,
                                               step_id=step_name,
                                               job_type=job_type,
                                               random=random.randint(0, 1000))
 def test_random_in_name(self):
     self.assertEqual(
         "beam_bq_job_COPY_beamappjobtest_abcd_randome",
         generate_bq_job_name("beamapp-job-test", "abcd",
                              BigQueryJobTypes.COPY, "randome"))