Exemplo n.º 1
0
    def after_pipeline_run(
        self,
        run_params: Dict[str, Any],
        pipeline: Pipeline,
        catalog: DataCatalog,
    ) -> None:
        """Hook to be invoked after a pipeline runs.
        Args:
            run_params: The params needed for the given run.
                Should be identical to the data logged by Journal.
                # @fixme: this needs to be modelled explicitly as code, instead of comment
                Schema: {
                    "run_id": str,
                    "project_path": str,
                    "env": str,
                    "kedro_version": str,
                    "tags": Optional[List[str]],
                    "from_nodes": Optional[List[str]],
                    "to_nodes": Optional[List[str]],
                    "node_names": Optional[List[str]],
                    "from_inputs": Optional[List[str]],
                    "load_versions": Optional[List[str]],
                    "pipeline_name": str,
                    "extra_params": Optional[Dict[str, Any]],
                }
            pipeline: The ``Pipeline`` that was run.
            catalog: The ``DataCatalog`` used during the run.
        """
        if self._is_mlflow_enabled:
            if isinstance(pipeline, PipelineML):
                with TemporaryDirectory() as tmp_dir:
                    # This will be removed at the end of the context manager,
                    # but we need to log in mlflow beforeremoving the folder
                    pipeline_catalog = pipeline._extract_pipeline_catalog(catalog)
                    artifacts = pipeline.extract_pipeline_artifacts(
                        pipeline_catalog, temp_folder=Path(tmp_dir)
                    )

                    if pipeline.model_signature == "auto":
                        input_data = pipeline_catalog.load(pipeline.input_name)
                        model_signature = infer_signature(model_input=input_data)
                    else:
                        model_signature = pipeline.model_signature

                    mlflow.pyfunc.log_model(
                        artifact_path=pipeline.model_name,
                        python_model=KedroPipelineModel(
                            pipeline_ml=pipeline,
                            catalog=pipeline_catalog,
                            **pipeline.kwargs,
                        ),
                        artifacts=artifacts,
                        conda_env=_format_conda_env(pipeline.conda_env),
                        signature=model_signature,
                    )
            # Close the mlflow active run at the end of the pipeline to avoid interactions with further runs
            mlflow.end_run()
        else:
            switch_catalog_logging(catalog, True)
Exemplo n.º 2
0
    def after_pipeline_run(
        self,
        run_params: Dict[str, Any],
        pipeline: Pipeline,
        catalog: DataCatalog,
    ) -> None:
        """Hook to be invoked after a pipeline runs.
        Args:
            run_params: The params needed for the given run.
                Should be identical to the data logged by Journal.
                # @fixme: this needs to be modelled explicitly as code, instead of comment
                Schema: {
                    "run_id": str,
                    "project_path": str,
                    "env": str,
                    "kedro_version": str,
                    "tags": Optional[List[str]],
                    "from_nodes": Optional[List[str]],
                    "to_nodes": Optional[List[str]],
                    "node_names": Optional[List[str]],
                    "from_inputs": Optional[List[str]],
                    "load_versions": Optional[List[str]],
                    "pipeline_name": str,
                    "extra_params": Optional[Dict[str, Any]],
                }
            pipeline: The ``Pipeline`` that was run.
            catalog: The ``DataCatalog`` used during the run.
        """

        if isinstance(pipeline, PipelineML):
            pipeline_catalog = pipeline.extract_pipeline_catalog(catalog)
            artifacts = pipeline.extract_pipeline_artifacts(pipeline_catalog)
            mlflow.pyfunc.log_model(
                artifact_path=self.model_name,
                python_model=KedroPipelineModel(pipeline_ml=pipeline,
                                                catalog=pipeline_catalog),
                artifacts=artifacts,
                conda_env=self.conda_env,
            )
        # Close the mlflow active run at the end of the pipeline to avoid interactions with further runs
        mlflow.end_run()