def before_pipeline_run(self, run_params: Dict[str, Any], pipeline: Pipeline, catalog: DataCatalog) -> None: """Hook to be invoked before a pipeline runs. Args: run_params: The params needed for the given run. Should be identical to the data logged by Journal. # @fixme: this needs to be modelled explicitly as code, instead of comment Schema: { "run_id": str, "project_path": str, "env": str, "kedro_version": str, "tags": Optional[List[str]], "from_nodes": Optional[List[str]], "to_nodes": Optional[List[str]], "node_names": Optional[List[str]], "from_inputs": Optional[List[str]], "load_versions": Optional[List[str]], "pipeline_name": str, "extra_params": Optional[Dict[str, Any]], } pipeline: The ``Pipeline`` that will be run. catalog: The ``DataCatalog`` to be used during the run. """ self.context = load_context( project_path=run_params["project_path"], env=run_params["env"], extra_params=run_params["extra_params"], ) mlflow_conf = get_mlflow_config(self.context) mlflow_conf.setup(self.context) run_name = (mlflow_conf.run_opts["name"] if mlflow_conf.run_opts["name"] is not None else run_params["pipeline_name"]) mlflow.start_run( run_id=mlflow_conf.run_opts["id"], experiment_id=mlflow_conf.experiment.experiment_id, run_name=run_name, nested=mlflow_conf.run_opts["nested"], ) # Set tags only for run parameters that have values. mlflow.set_tags({k: v for k, v in run_params.items() if v}) # add manually git sha for consistency with the journal # TODO : this does not take into account not committed files, so it # does not ensure reproducibility. Define what to do. mlflow.set_tag("git_sha", _git_sha(run_params["project_path"])) mlflow.set_tag( "kedro_command", _generate_kedro_command( tags=run_params["tags"], node_names=run_params["node_names"], from_nodes=run_params["from_nodes"], to_nodes=run_params["to_nodes"], from_inputs=run_params["from_inputs"], load_versions=run_params["load_versions"], pipeline_name=run_params["pipeline_name"], ), )
def test_invalid_git_sha(tmp_path, caplog): _git_sha(tmp_path) assert "Unable to git describe" in caplog.record_tuples[0][2]
def test_git_sha(tmp_path, mocker): mocker.patch("subprocess.check_output", return_value="mocked_return".encode()) result = _git_sha(tmp_path) assert result == "mocked_return"