Python BigQueryHook.insert_job示例

编程语言: Python

命名空间/包名称: airflow.providers.google.cloud.hooks.bigquery

类/类型: BigQueryHook

方法/功能: insert_job

hotexamples.com的示例: 2

Python BigQueryHook.insert_job - 已找到2个示例。这些是从开源项目中提取的最受好评的airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.insert_job现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

BigQueryHook(26)

get_conn(5)

get_job(4)

get_pandas_df(2)

insert_job(2)

list_rows(2)

run_copy(2)

run_extract(2)

run_load(2)

table_exists(2)

table_partition_exists(2)

create_external_table(1)

delete_dataset(1)

generate_job_id(1)

run_query(1)

示例#1

显示文件

    def execute(self, context: 'Context'):
        self.log.info(
            'Executing extract of %s into: %s',
            self.source_project_dataset_table,
            self.destination_cloud_storage_uris,
        )
        hook = BigQueryHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            location=self.location,
            impersonation_chain=self.impersonation_chain,
        )

        table_ref = TableReference.from_string(self.source_project_dataset_table, hook.project_id)

        configuration: Dict[str, Any] = {
            'extract': {
                'sourceTable': table_ref.to_api_repr(),
                'compression': self.compression,
                'destinationUris': self.destination_cloud_storage_uris,
                'destinationFormat': self.export_format,
            }
        }

        if self.labels:
            configuration['labels'] = self.labels

        if self.export_format == 'CSV':
            # Only set fieldDelimiter and printHeader fields if using CSV.
            # Google does not like it if you set these fields for other export
            # formats.
            configuration['extract']['fieldDelimiter'] = self.field_delimiter
            configuration['extract']['printHeader'] = self.print_header

        hook.insert_job(configuration=configuration)

示例#2

显示文件

文件： bigquery_to_gcs.py 项目： leahecole/airflow

    def execute(self, context: 'Context'):
        self.log.info(
            'Executing extract of %s into: %s',
            self.source_project_dataset_table,
            self.destination_cloud_storage_uris,
        )
        hook = BigQueryHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            location=self.location,
            impersonation_chain=self.impersonation_chain,
        )
        self.hook = hook

        configuration = self._prepare_configuration()
        job_id = hook.generate_job_id(
            job_id=self.job_id,
            dag_id=self.dag_id,
            task_id=self.task_id,
            logical_date=context["logical_date"],
            configuration=configuration,
            force_rerun=self.force_rerun,
        )

        try:
            self.log.info("Executing: %s", configuration)
            job: ExtractJob = hook.insert_job(
                job_id=job_id,
                configuration=configuration,
                project_id=self.project_id,
                location=self.location,
                timeout=self.result_timeout,
                retry=self.result_retry,
            )
            self._handle_job_error(job)
        except Conflict:
            # If the job already exists retrieve it
            job = hook.get_job(
                project_id=self.project_id,
                location=self.location,
                job_id=job_id,
            )
            if job.state in self.reattach_states:
                # We are reattaching to a job
                job.result(timeout=self.result_timeout,
                           retry=self.result_retry)
                self._handle_job_error(job)
            else:
                # Same job configuration so we need force_rerun
                raise AirflowException(
                    f"Job with id: {job_id} already exists and is in {job.state} state. If you "
                    f"want to force rerun it consider setting `force_rerun=True`."
                    f"Or, if you want to reattach in this scenario add {job.state} to `reattach_states`"
                )

        conf = job.to_api_repr()["configuration"]["extract"]["sourceTable"]
        dataset_id, project_id, table_id = conf["datasetId"], conf[
            "projectId"], conf["tableId"]
        BigQueryTableLink.persist(
            context=context,
            task_instance=self,
            dataset_id=dataset_id,
            project_id=project_id,
            table_id=table_id,
        )