def extract_on_complete(self, task_instance) -> [StepMetadata]: log.debug(f"extract_on_complete({task_instance})") context = self.parse_sql_context() source = self._source() try: bigquery_job_id = self._get_xcom_bigquery_job_id(task_instance) context['bigquery.job_id'] = bigquery_job_id if bigquery_job_id is None: raise Exception("Xcom could not resolve BigQuery job id." + "Job may have failed.") except Exception as e: log.error(f"Cannot retrieve job details from BigQuery.Client. {e}", exc_info=True) context['bigquery.extractor.client_error'] = \ f"{e}: {traceback.format_exc()}" return [StepMetadata( name=get_job_name(task=self.operator), context=context, inputs=None, outputs=None )] inputs = None outputs = None try: client = bigquery.Client() try: job = client.get_job(job_id=bigquery_job_id) job_properties_str = json.dumps(job._properties) context['bigquery.job_properties'] = job_properties_str inputs = self._get_input_from_bq(job, context, source, client) outputs = self._get_output_from_bq(job, source, client) finally: # Ensure client has close() defined, otherwise ignore. # NOTE: close() was introduced in python-bigquery v1.23.0 if hasattr(client, "close"): client.close() except Exception as e: log.error(f"Cannot retrieve job details from BigQuery.Client. {e}", exc_info=True) context['bigquery.extractor.error'] = \ f"{e}: {traceback.format_exc()}" return [StepMetadata( name=get_job_name(task=self.operator), inputs=inputs, outputs=outputs, context=context )]
def extract_on_complete(self, task_instance) -> StepMetadata: inputs = [ Dataset.from_table_schema(self.source, DbTableSchema( schema_name='schema', table_name=DbTableName('extract_on_complete_input1'), columns=[DbColumn( name='field1', type='text', description='', ordinal_position=1 ), DbColumn( name='field2', type='text', description='', ordinal_position=2 )] )) ] outputs = [ Dataset.from_table(self.source, "extract_on_complete_output1") ] return StepMetadata( name=get_job_name(task=self.operator), inputs=inputs, outputs=outputs, context={ "extract_on_complete": "extract_on_complete" } )
def extract(self) -> [StepMetadata]: inputs = [Dataset.from_table(self.source, "extract_input1")] outputs = [Dataset.from_table(self.source, "extract_output1")] return [ StepMetadata(name=get_job_name(task=self.operator), inputs=inputs, outputs=outputs, context={"extract": "extract"}) ]
def extract_on_complete(self, task_instance) -> Optional[StepMetadata]: log.debug(f"extract_on_complete({task_instance})") context = self.parse_sql_context() try: bigquery_job_id = self._get_xcom_bigquery_job_id(task_instance) if bigquery_job_id is None: raise Exception("Xcom could not resolve BigQuery job id." + "Job may have failed.") except Exception as e: log.error(f"Cannot retrieve job details from BigQuery.Client. {e}", exc_info=True) return StepMetadata( name=get_job_name(task=self.operator), inputs=None, outputs=None, run_facets={ "bigQuery_error": BigQueryErrorRunFacet( clientError=f"{e}: {traceback.format_exc()}", parserError=context.parser_error ) } ) inputs = None output = None run_facets = {} try: client = bigquery.Client() try: job = client.get_job(job_id=bigquery_job_id) props = job._properties run_stat_facet, dataset_stat_facet = self._get_output_statistics(props) run_facets.update({ "bigQuery_statistics": run_stat_facet }) inputs = self._get_input_from_bq(props, client) output = self._get_output_from_bq(props, client) if output: output.custom_facets.update({ "stats": dataset_stat_facet }) finally: # Ensure client has close() defined, otherwise ignore. # NOTE: close() was introduced in python-bigquery v1.23.0 if hasattr(client, "close"): client.close() except Exception as e: log.error(f"Cannot retrieve job details from BigQuery.Client. {e}", exc_info=True) run_facets.update({ "bigQuery_error": BigQueryErrorRunFacet( clientError=f"{e}: {traceback.format_exc()}", parserError=context.parser_error ) }) return StepMetadata( name=get_job_name(task=self.operator), inputs=inputs, outputs=[output] if output else [], run_facets=run_facets )