def execute(self, context: Dict): hook = DataFusionHook( gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to, api_version=self.api_version, ) self.log.info("Starting Data Fusion pipeline: %s", self.pipeline_name) instance = hook.get_instance( instance_name=self.instance_name, location=self.location, project_id=self.project_id, ) api_url = instance["apiEndpoint"] hook.stop_pipeline( pipeline_name=self.pipeline_name, instance_url=api_url, namespace=self.namespace, ) self.log.info("Pipeline started")
def execute(self, context: Dict): hook = DataFusionHook( gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to, api_version=self.api_version, impersonation_chain=self.impersonation_chain, ) self.log.info("Listing Data Fusion pipelines") instance = hook.get_instance( instance_name=self.instance_name, location=self.location, project_id=self.project_id, ) api_url = instance["apiEndpoint"] pipelines = hook.list_pipelines( instance_url=api_url, namespace=self.namespace, artifact_version=self.artifact_version, artifact_name=self.artifact_name, ) self.log.info("%s", pipelines) return pipelines
def execute(self, context: dict) -> None: hook = DataFusionHook( gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to, api_version=self.api_version, impersonation_chain=self.impersonation_chain, ) self.log.info("Deleting Data Fusion pipeline: %s", self.pipeline_name) instance = hook.get_instance( instance_name=self.instance_name, location=self.location, project_id=self.project_id, ) api_url = instance["apiEndpoint"] hook.delete_pipeline( pipeline_name=self.pipeline_name, version_id=self.version_id, instance_url=api_url, namespace=self.namespace, ) self.log.info("Pipeline deleted")
def execute(self, context: 'Context') -> dict: hook = DataFusionHook( gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to, api_version=self.api_version, impersonation_chain=self.impersonation_chain, ) self.log.info("Retrieving Data Fusion instance: %s", self.instance_name) instance = hook.get_instance( instance_name=self.instance_name, location=self.location, project_id=self.project_id, ) project_id = self.project_id or DataFusionPipelineLinkHelper.get_project_id( instance) DataFusionInstanceLink.persist(context=context, task_instance=self, project_id=project_id) return instance
def execute(self, context: 'Context') -> str: hook = DataFusionHook( gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to, api_version=self.api_version, impersonation_chain=self.impersonation_chain, ) self.log.info("Starting Data Fusion pipeline: %s", self.pipeline_name) instance = hook.get_instance( instance_name=self.instance_name, location=self.location, project_id=self.project_id, ) api_url = instance["apiEndpoint"] pipeline_id = hook.start_pipeline( pipeline_name=self.pipeline_name, instance_url=api_url, namespace=self.namespace, runtime_args=self.runtime_args, ) self.log.info("Pipeline %s submitted successfully.", pipeline_id) DataFusionPipelineLink.persist(context=context, task_instance=self, uri=instance["serviceEndpoint"]) if not self.asynchronous: self.log.info( "Waiting when pipeline %s will be in one of the success states", pipeline_id) hook.wait_for_pipeline_state( success_states=self.success_states, pipeline_id=pipeline_id, pipeline_name=self.pipeline_name, namespace=self.namespace, instance_url=api_url, timeout=self.pipeline_timeout, ) self.log.info("Job %s discover success state.", pipeline_id) return pipeline_id
def poke(self, context: dict) -> bool: self.log.info( "Waiting for pipeline %s to be in one of the states: %s.", self.pipeline_id, ", ".join(self.expected_statuses), ) hook = DataFusionHook( gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to, impersonation_chain=self.impersonation_chain, ) instance = hook.get_instance( instance_name=self.instance_name, location=self.location, project_id=self.project_id, ) api_url = instance["apiEndpoint"] pipeline_status = None try: pipeline_workflow = hook.get_pipeline_workflow( pipeline_name=self.pipeline_name, instance_url=api_url, pipeline_id=self.pipeline_id, namespace=self.namespace, ) pipeline_status = pipeline_workflow["status"] except AirflowException: pass # Because the pipeline may not be visible in system yet if self.failure_statuses and pipeline_status in self.failure_statuses: raise AirflowException( f"Pipeline with id '{self.pipeline_id}' state is: {pipeline_status}. " f"Terminating sensor...") self.log.debug("Current status of the pipeline workflow for %s: %s.", self.pipeline_id, pipeline_status) return pipeline_status in self.expected_statuses