Python BigQueryHook.get_service示例

编程语言: Python

命名空间/包名称: airflow.contrib.hooks.bigquery_hook

类/类型: BigQueryHook

方法/功能: get_service

hotexamples.com的示例: 4

Python BigQueryHook.get_service - 已找到4个示例。这些是从开源项目中提取的最受好评的airflow.contrib.hooks.bigquery_hook.BigQueryHook.get_service现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

get_conn(30)

BigQueryHook(13)

get_pandas_df(6)

_get_credentials(5)

get_service(4)

_get_field(3)

run(3)

table_exists(3)

get_connection(2)

table_list_partition(2)

tables(2)

get_first(1)

get_records(1)

run_extract(1)

run_query(1)

示例#1

显示文件

文件： bigquery_plugin.py 项目： LeoArruda/DataAcyclicGraphs

    def poke(self, context):
        hook = BigQueryHook(bigquery_conn_id=self.gcp_conn_id)
        service = hook.get_service()

        try:
            service.datasets().get(datasetId=self.dataset_id,
                                   projectId=self.project_id).execute()

            return True
        except HttpError as e:
            if e.resp["status"] == "404":
                return False

            raise AirflowException(f"Error: {e}")

示例#2

显示文件

文件： bigquery_export_query_results_to_gcs.py 项目： meftasadat/airflow

 def execute(self, context):
     result_export_success = True
     dataset_creation_success = False
     query_execution_success = False
     err_msg = ""
     try:
         hook = BigQueryHook(use_legacy_sql=False,
                             bigquery_conn_id=self.bigquery_conn_id,
                             delegate_to=self.delegate_to,
                             location=self.location)
         service = hook.get_service()
         cursor = BigQueryBaseCursor(project_id=self.project_id,
                                     service=service)
         cursor.create_empty_dataset(dataset_id=self.dataset_id,
                                     project_id=self.project_id,
                                     dataset_reference={
                                         'defaultTableExpirationMs':
                                         self.default_table_expiry_in_ms
                                     })
         dataset_creation_success = True
         cursor.run_query(destination_dataset_table=self.temp_table_name,
                          write_disposition='WRITE_TRUNCATE',
                          allow_large_results=True,
                          sql=self.query,
                          use_legacy_sql=False)
         query_execution_success = True
         cursor.run_extract(
             source_project_dataset_table=self.temp_table_name,
             destination_cloud_storage_uris=self.
             destination_cloud_storage_uris,
             compression=self.compression,
             export_format=self.export_format,
             field_delimiter=self.field_delimiter,
             print_header=self.print_header)
     except Exception as e:
         err_msg = e
         logging.error(e)
         result_export_success = False
     finally:
         if query_execution_success:
             cursor.run_table_delete(
                 deletion_dataset_table=self.temp_table_name)
         if dataset_creation_success:
             cursor.delete_dataset(dataset_id=self.dataset_id,
                                   project_id=self.project_id)
         if result_export_success is False:
             raise AirflowException(
                 "Query export failed. Error: {}".format(err_msg))

示例#3

显示文件

  def execute(self, context):
    gcs_hook = GoogleCloudStorageHook(google_cloud_storage_conn_id=self.conn_id)
    partner_ids = models.Variable.get('partner_ids').split(',')
    for i, partner_id in enumerate(partner_ids):
      filename = download_and_transform_erf(self, partner_id)
      entity_read_file_ndj = 'gs://%s/%s' % (self.gcs_bucket, filename)
      hook = BigQueryHook(bigquery_conn_id=self.conn_id)
      self.service = hook.get_service()
      if i == 0:
        write_disposition = 'WRITE_TRUNCATE'
      else:
        write_disposition = 'WRITE_APPEND'

      bq_base_cursor = BigQueryBaseCursor(self.service, self.cloud_project_id)
      bq_base_cursor.run_load(
          self.bq_table,
          [entity_read_file_ndj],
          schema_fields=self.schema,
          source_format='NEWLINE_DELIMITED_JSON',
          write_disposition=write_disposition,
          ignore_unknown_values=True)
      gcs_hook.delete(self.gcs_bucket, filename)

示例#4

显示文件

文件： bigquery_operator.py 项目： GlobalFishingWatch/airflow-gfw

    def execute(self, context):
        bq_hook = BigQueryHook(bigquery_conn_id=self.bigquery_conn_id,
                               delegate_to=self.delegate_to)

        logging.info('start_date_str = %s', self.start_date_str)
        logging.info('end_date_str = %s', self.end_date_str)
        logging.info('Date conversion starts')
        start = str2date(self.start_date_str)
        end = str2date(self.end_date_str)
        logging.info('Date conversion ends')
        logging.info('time_partitioning = %s', self.time_partitioning)

        for i in daterange(start, end):
            date_no_dash = i.strftime("%Y%m%d")
            partitioned_table_id = self.table_id + date_no_dash
            logging.info("Partitioned table {0}".format(partitioned_table_id))

            logging.info('Hooks to check if table exists <%s:%s.%s>',
                         self.project_id, self.dataset_id,
                         partitioned_table_id)
            table_exists = bq_hook.table_exists(self.project_id,
                                                self.dataset_id,
                                                partitioned_table_id)
            if not table_exists:
                logging.info('Table <%s> does not exists',
                             partitioned_table_id)
                logging.info('Connects to BigQuery')
                cursor = BigQueryHelperCursor(bq_hook.get_service(),
                                              self.project_id)

                logging.info('Creates the empty table %s with the schema %s',
                             partitioned_table_id, self.schema_fields)
                cursor.create_empty_table(
                    project_id=self.project_id,
                    dataset_id=self.dataset_id,
                    table_id=partitioned_table_id,
                    schema_fields=self.schema_fields,
                    time_partitioning=self.time_partitioning)