Пример #1
0
 def execute(self, context: Dict):
     hook = DataFusionHook(
         gcp_conn_id=self.gcp_conn_id,
         delegate_to=self.delegate_to,
         api_version=self.api_version,
     )
     self.log.info("Starting Data Fusion pipeline: %s", self.pipeline_name)
     instance = hook.get_instance(
         instance_name=self.instance_name,
         location=self.location,
         project_id=self.project_id,
     )
     api_url = instance["apiEndpoint"]
     hook.stop_pipeline(
         pipeline_name=self.pipeline_name,
         instance_url=api_url,
         namespace=self.namespace,
     )
     self.log.info("Pipeline started")
Пример #2
0
 def execute(self, context: Dict):
     hook = DataFusionHook(
         gcp_conn_id=self.gcp_conn_id,
         delegate_to=self.delegate_to,
         api_version=self.api_version,
         impersonation_chain=self.impersonation_chain,
     )
     self.log.info("Listing Data Fusion pipelines")
     instance = hook.get_instance(
         instance_name=self.instance_name, location=self.location, project_id=self.project_id,
     )
     api_url = instance["apiEndpoint"]
     pipelines = hook.list_pipelines(
         instance_url=api_url,
         namespace=self.namespace,
         artifact_version=self.artifact_version,
         artifact_name=self.artifact_name,
     )
     self.log.info("%s", pipelines)
     return pipelines
Пример #3
0
 def execute(self, context: dict) -> None:
     hook = DataFusionHook(
         gcp_conn_id=self.gcp_conn_id,
         delegate_to=self.delegate_to,
         api_version=self.api_version,
         impersonation_chain=self.impersonation_chain,
     )
     self.log.info("Deleting Data Fusion pipeline: %s", self.pipeline_name)
     instance = hook.get_instance(
         instance_name=self.instance_name,
         location=self.location,
         project_id=self.project_id,
     )
     api_url = instance["apiEndpoint"]
     hook.delete_pipeline(
         pipeline_name=self.pipeline_name,
         version_id=self.version_id,
         instance_url=api_url,
         namespace=self.namespace,
     )
     self.log.info("Pipeline deleted")
Пример #4
0
    def execute(self, context: 'Context') -> dict:
        hook = DataFusionHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            api_version=self.api_version,
            impersonation_chain=self.impersonation_chain,
        )
        self.log.info("Retrieving Data Fusion instance: %s",
                      self.instance_name)
        instance = hook.get_instance(
            instance_name=self.instance_name,
            location=self.location,
            project_id=self.project_id,
        )

        project_id = self.project_id or DataFusionPipelineLinkHelper.get_project_id(
            instance)
        DataFusionInstanceLink.persist(context=context,
                                       task_instance=self,
                                       project_id=project_id)
        return instance
Пример #5
0
    def execute(self, context: 'Context') -> str:
        hook = DataFusionHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            api_version=self.api_version,
            impersonation_chain=self.impersonation_chain,
        )
        self.log.info("Starting Data Fusion pipeline: %s", self.pipeline_name)
        instance = hook.get_instance(
            instance_name=self.instance_name,
            location=self.location,
            project_id=self.project_id,
        )
        api_url = instance["apiEndpoint"]
        pipeline_id = hook.start_pipeline(
            pipeline_name=self.pipeline_name,
            instance_url=api_url,
            namespace=self.namespace,
            runtime_args=self.runtime_args,
        )
        self.log.info("Pipeline %s submitted successfully.", pipeline_id)

        DataFusionPipelineLink.persist(context=context,
                                       task_instance=self,
                                       uri=instance["serviceEndpoint"])

        if not self.asynchronous:
            self.log.info(
                "Waiting when pipeline %s will be in one of the success states",
                pipeline_id)
            hook.wait_for_pipeline_state(
                success_states=self.success_states,
                pipeline_id=pipeline_id,
                pipeline_name=self.pipeline_name,
                namespace=self.namespace,
                instance_url=api_url,
                timeout=self.pipeline_timeout,
            )
            self.log.info("Job %s discover success state.", pipeline_id)
        return pipeline_id
Пример #6
0
    def poke(self, context: dict) -> bool:
        self.log.info(
            "Waiting for pipeline %s to be in one of the states: %s.",
            self.pipeline_id,
            ", ".join(self.expected_statuses),
        )
        hook = DataFusionHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )

        instance = hook.get_instance(
            instance_name=self.instance_name,
            location=self.location,
            project_id=self.project_id,
        )
        api_url = instance["apiEndpoint"]
        pipeline_status = None
        try:
            pipeline_workflow = hook.get_pipeline_workflow(
                pipeline_name=self.pipeline_name,
                instance_url=api_url,
                pipeline_id=self.pipeline_id,
                namespace=self.namespace,
            )
            pipeline_status = pipeline_workflow["status"]
        except AirflowException:
            pass  # Because the pipeline may not be visible in system yet

        if self.failure_statuses and pipeline_status in self.failure_statuses:
            raise AirflowException(
                f"Pipeline with id '{self.pipeline_id}' state is: {pipeline_status}. "
                f"Terminating sensor...")

        self.log.debug("Current status of the pipeline workflow for %s: %s.",
                       self.pipeline_id, pipeline_status)
        return pipeline_status in self.expected_statuses