def poke(self, context):
        hook = BigQueryHook(bigquery_conn_id=self.gcp_conn_id)
        service = hook.get_service()

        try:
            service.datasets().get(datasetId=self.dataset_id,
                                   projectId=self.project_id).execute()

            return True
        except HttpError as e:
            if e.resp["status"] == "404":
                return False

            raise AirflowException(f"Error: {e}")
 def execute(self, context):
     result_export_success = True
     dataset_creation_success = False
     query_execution_success = False
     err_msg = ""
     try:
         hook = BigQueryHook(use_legacy_sql=False,
                             bigquery_conn_id=self.bigquery_conn_id,
                             delegate_to=self.delegate_to,
                             location=self.location)
         service = hook.get_service()
         cursor = BigQueryBaseCursor(project_id=self.project_id,
                                     service=service)
         cursor.create_empty_dataset(dataset_id=self.dataset_id,
                                     project_id=self.project_id,
                                     dataset_reference={
                                         'defaultTableExpirationMs':
                                         self.default_table_expiry_in_ms
                                     })
         dataset_creation_success = True
         cursor.run_query(destination_dataset_table=self.temp_table_name,
                          write_disposition='WRITE_TRUNCATE',
                          allow_large_results=True,
                          sql=self.query,
                          use_legacy_sql=False)
         query_execution_success = True
         cursor.run_extract(
             source_project_dataset_table=self.temp_table_name,
             destination_cloud_storage_uris=self.
             destination_cloud_storage_uris,
             compression=self.compression,
             export_format=self.export_format,
             field_delimiter=self.field_delimiter,
             print_header=self.print_header)
     except Exception as e:
         err_msg = e
         logging.error(e)
         result_export_success = False
     finally:
         if query_execution_success:
             cursor.run_table_delete(
                 deletion_dataset_table=self.temp_table_name)
         if dataset_creation_success:
             cursor.delete_dataset(dataset_id=self.dataset_id,
                                   project_id=self.project_id)
         if result_export_success is False:
             raise AirflowException(
                 "Query export failed. Error: {}".format(err_msg))
Пример #3
0
  def execute(self, context):
    gcs_hook = GoogleCloudStorageHook(google_cloud_storage_conn_id=self.conn_id)
    partner_ids = models.Variable.get('partner_ids').split(',')
    for i, partner_id in enumerate(partner_ids):
      filename = download_and_transform_erf(self, partner_id)
      entity_read_file_ndj = 'gs://%s/%s' % (self.gcs_bucket, filename)
      hook = BigQueryHook(bigquery_conn_id=self.conn_id)
      self.service = hook.get_service()
      if i == 0:
        write_disposition = 'WRITE_TRUNCATE'
      else:
        write_disposition = 'WRITE_APPEND'

      bq_base_cursor = BigQueryBaseCursor(self.service, self.cloud_project_id)
      bq_base_cursor.run_load(
          self.bq_table,
          [entity_read_file_ndj],
          schema_fields=self.schema,
          source_format='NEWLINE_DELIMITED_JSON',
          write_disposition=write_disposition,
          ignore_unknown_values=True)
      gcs_hook.delete(self.gcs_bucket, filename)
    def execute(self, context):
        bq_hook = BigQueryHook(bigquery_conn_id=self.bigquery_conn_id,
                               delegate_to=self.delegate_to)

        logging.info('start_date_str = %s', self.start_date_str)
        logging.info('end_date_str = %s', self.end_date_str)
        logging.info('Date conversion starts')
        start = str2date(self.start_date_str)
        end = str2date(self.end_date_str)
        logging.info('Date conversion ends')
        logging.info('time_partitioning = %s', self.time_partitioning)

        for i in daterange(start, end):
            date_no_dash = i.strftime("%Y%m%d")
            partitioned_table_id = self.table_id + date_no_dash
            logging.info("Partitioned table {0}".format(partitioned_table_id))

            logging.info('Hooks to check if table exists <%s:%s.%s>',
                         self.project_id, self.dataset_id,
                         partitioned_table_id)
            table_exists = bq_hook.table_exists(self.project_id,
                                                self.dataset_id,
                                                partitioned_table_id)
            if not table_exists:
                logging.info('Table <%s> does not exists',
                             partitioned_table_id)
                logging.info('Connects to BigQuery')
                cursor = BigQueryHelperCursor(bq_hook.get_service(),
                                              self.project_id)

                logging.info('Creates the empty table %s with the schema %s',
                             partitioned_table_id, self.schema_fields)
                cursor.create_empty_table(
                    project_id=self.project_id,
                    dataset_id=self.dataset_id,
                    table_id=partitioned_table_id,
                    schema_fields=self.schema_fields,
                    time_partitioning=self.time_partitioning)