def test_toml_invalid_format(self, tmp_path): """Test for loading context from an invalid path. """ toml_path = tmp_path / "pyproject.toml" toml_path.write_text("!!") # Invalid TOML pattern = "Failed to parse 'pyproject.toml' file" with pytest.raises(RuntimeError, match=re.escape(pattern)): _get_project_metadata(str(tmp_path))
def test_no_config_files(self, mocker): mocker.patch.object(Path, "is_file", return_value=False) pattern = ( f"Could not find the project configuration file 'pyproject.toml' " f"in {self.project_path}") with pytest.raises(RuntimeError, match=re.escape(pattern)): _get_project_metadata(self.project_path)
def test_toml_file_without_kedro_section(self, mocker): mocker.patch.object(Path, "is_file", return_value=True) mocker.patch("anyconfig.load", return_value={}) pattern = "There's no '[tool.kedro]' section in the 'pyproject.toml'." with pytest.raises(RuntimeError, match=re.escape(pattern)): _get_project_metadata(self.project_path)
def test_toml_file_has_missing_mandatory_keys(self, mocker): mocker.patch.object(Path, "is_file", return_value=True) pyproject_toml_payload = { "tool": { "kedro": {"project_version": kedro_version, "unexpected_key": "hello"} } } mocker.patch("anyconfig.load", return_value=pyproject_toml_payload) pattern = ( "Missing required keys ['package_name', 'project_name'] " "from 'pyproject.toml'." ) with pytest.raises(RuntimeError, match=re.escape(pattern)): _get_project_metadata(self.project_path)
def test_kedro_mlflow_config_experiment_was_deleted(kedro_project_with_mlflow_conf): # create an experiment with the same name and then delete it mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri() mlflow_client = MlflowClient(mlflow_tracking_uri) mlflow_client.create_experiment("exp1") mlflow_client.delete_experiment( mlflow_client.get_experiment_by_name("exp1").experiment_id ) # the config must restore properly the experiment config = KedroMlflowConfig( project_path=kedro_project_with_mlflow_conf, mlflow_tracking_uri="mlruns", experiment_opts=dict(name="exp1"), ) project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf) _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf) configure_project(project_metadata.package_name) with KedroSession.create( "fake_project", project_path=kedro_project_with_mlflow_conf ): config.setup() assert "exp1" in [exp.name for exp in config.mlflow_client.list_experiments()]
def test_mlflow_pipeline_hook_with_copy_mode( kedro_project_with_mlflow_conf, dummy_pipeline_ml, dummy_catalog, dummy_run_params, copy_mode, expected, ): # config_with_base_mlflow_conf is a conftest fixture project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf) _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf) configure_project(project_metadata.package_name) with KedroSession.create( package_name=project_metadata.package_name, project_path=kedro_project_with_mlflow_conf, ): pipeline_hook = MlflowPipelineHook() runner = SequentialRunner() pipeline_hook.after_catalog_created( catalog=dummy_catalog, # `after_catalog_created` is not using any of arguments bellow, # so we are setting them to empty values. conf_catalog={}, conf_creds={}, feed_dict={}, save_version="", load_versions="", run_id=dummy_run_params["run_id"], ) pipeline_to_run = pipeline_ml_factory( training=dummy_pipeline_ml.training, inference=dummy_pipeline_ml.inference, input_name=dummy_pipeline_ml.input_name, conda_env={}, model_name=dummy_pipeline_ml.model_name, copy_mode=copy_mode, ) pipeline_hook.before_pipeline_run( run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog ) runner.run(pipeline_to_run, dummy_catalog) run_id = mlflow.active_run().info.run_id pipeline_hook.after_pipeline_run( run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog ) mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri() mlflow.set_tracking_uri(mlflow_tracking_uri) loaded_model = mlflow.pyfunc.load_model(model_uri=f"runs:/{run_id}/model") actual_copy_mode = { name: ds._copy_mode for name, ds in loaded_model._model_impl.python_model.loaded_catalog._data_sets.items() } assert actual_copy_mode == expected
def test_pipeline_run_hook_getting_configs( kedro_project, dummy_run_params, dummy_pipeline, dummy_catalog, ): _write_yaml( kedro_project / "conf" / "local" / "mlflow.yml", dict(hooks=dict( node=dict(flatten_dict_params=True, recursive=False, sep="-")), ), ), project_metadata = _get_project_metadata(kedro_project) _add_src_to_path(project_metadata.source_dir, kedro_project) configure_project(project_metadata.package_name) with KedroSession.create( package_name=project_metadata.package_name, project_path=kedro_project, ): mlflow_node_hook = MlflowNodeHook() mlflow_node_hook.before_pipeline_run(run_params=dummy_run_params, pipeline=dummy_pipeline, catalog=dummy_catalog) assert ( mlflow_node_hook.flatten, mlflow_node_hook.recursive, mlflow_node_hook.sep, ) == (True, False, "-")
def ui(env, port, host): """Opens the mlflow user interface with the project-specific settings of mlflow.yml. This interface enables to browse and compares runs. """ project_path = Path().cwd() project_metadata = _get_project_metadata(project_path) _add_src_to_path(project_metadata.source_dir, project_path) configure_project(project_metadata.package_name) with KedroSession.create( package_name=project_metadata.package_name, project_path=project_path, env=env, ): mlflow_conf = get_mlflow_config() host = host or mlflow_conf.ui_opts.get("host") port = port or mlflow_conf.ui_opts.get("port") # call mlflow ui with specific options # TODO : add more options for ui subprocess.call( [ "mlflow", "ui", "--backend-store-uri", mlflow_conf.mlflow_tracking_uri, "--host", host, "--port", port, ] )
def test_kedro_mlflow_config_setup_tracking_priority(kedro_project_with_mlflow_conf): """Test if the mlflow_tracking uri set is the one of mlflow.yml if it also eist in credentials. Args: mocker ([type]): [description] tmp_path ([type]): [description] """ # create a ".kedro.yml" file to identify "tmp_path" as the root of a kedro project (kedro_project_with_mlflow_conf / "conf/base/credentials.yml").write_text( yaml.dump(dict(my_mlflow_creds=dict(mlflow_tracking_uri="mlruns2"))) ) config = KedroMlflowConfig( project_path=kedro_project_with_mlflow_conf, mlflow_tracking_uri="mlruns1", credentials="my_mlflow_creds", ) project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf) _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf) configure_project(project_metadata.package_name) with KedroSession.create( "fake_project", project_path=kedro_project_with_mlflow_conf ): config.setup() assert ( mlflow.get_tracking_uri() == (kedro_project_with_mlflow_conf / "mlruns1").as_uri() )
def main(): # pragma: no cover """Main entry point. Look for a ``cli.py``, and, if found, add its commands to `kedro`'s before invoking the CLI. """ _init_plugins() global_groups = [cli] global_groups.extend(load_entry_points("global")) project_groups = [] cli_context = dict() path = Path.cwd() if _is_project(path): # load project commands from cli.py metadata = _get_project_metadata(path) cli_context = dict(obj=metadata) _add_src_to_path(metadata.source_dir, path) project_groups.extend(load_entry_points("project")) package_name = metadata.package_name try: project_cli = importlib.import_module(f"{package_name}.cli") project_groups.append(project_cli.cli) except Exception as exc: raise KedroCliError( f"Cannot load commands from {package_name}.cli" ) from exc cli_collection = CommandCollection( ("Global commands", global_groups), ("Project specific commands", project_groups), ) cli_collection(**cli_context)
def test_mlflow_config_with_templated_config_loader(kedro_project_with_tcl, ): _write_yaml( kedro_project_with_tcl / "conf" / "local" / "mlflow.yml", dict( mlflow_tracking_uri="${mlflow_tracking_uri}", credentials=None, disable_tracking=dict(pipelines=["my_disabled_pipeline"]), experiment=dict(name="fake_package", create=True), run=dict(id="123456789", name="my_run", nested=True), ui=dict(port="5151", host="localhost"), hooks=dict(node=dict( flatten_dict_params=True, recursive=False, sep="-", long_parameters_strategy="truncate", )), ), ) _write_yaml( kedro_project_with_tcl / "conf" / "local" / "globals.yml", dict(mlflow_tracking_uri="dynamic_mlruns"), ) expected = { "mlflow_tracking_uri": (kedro_project_with_tcl / "dynamic_mlruns").as_uri(), "credentials": None, "disable_tracking": { "pipelines": ["my_disabled_pipeline"] }, "experiments": { "name": "fake_package", "create": True }, "run": { "id": "123456789", "name": "my_run", "nested": True }, "ui": { "port": "5151", "host": "localhost" }, "hooks": { "node": { "flatten_dict_params": True, "recursive": False, "sep": "-", "long_parameters_strategy": "truncate", } }, } project_metadata = _get_project_metadata(kedro_project_with_tcl) _add_src_to_path(project_metadata.source_dir, kedro_project_with_tcl) configure_project(project_metadata.package_name) with KedroSession.create(project_metadata.package_name, kedro_project_with_tcl): assert get_mlflow_config().to_dict() == expected
def reload_kedro(path, line=None): """Line magic which reloads all Kedro default variables.""" global startup_error global context global catalog global session try: import kedro.config.default_logger from kedro.framework.hooks import get_hook_manager from kedro.framework.project import configure_project from kedro.framework.session import KedroSession from kedro.framework.session.session import _activate_session from kedro.framework.cli.jupyter import collect_line_magic except ImportError: logging.error( "Kedro appears not to be installed in your current environment " "or your current IPython session was not started in a valid Kedro project." ) raise try: path = path or project_path # clear hook manager hook_manager = get_hook_manager() name_plugin_pairs = hook_manager.list_name_plugin() for name, plugin in name_plugin_pairs: hook_manager.unregister(name=name, plugin=plugin) # remove cached user modules metadata = _get_project_metadata(path) to_remove = [ mod for mod in sys.modules if mod.startswith(metadata.package_name) ] # `del` is used instead of `reload()` because: If the new version of a module does not # define a name that was defined by the old version, the old definition remains. for module in to_remove: del sys.modules[module] configure_project(metadata.package_name) session = KedroSession.create(metadata.package_name, path) _activate_session(session, force=True) logging.debug("Loading the context from %s", str(path)) context = session.load_context() catalog = context.catalog logging.info("** Kedro project %s", str(metadata.project_name)) logging.info("Defined global variable `context` and `catalog`") for line_magic in collect_line_magic(): register_line_magic(needs_local_scope(line_magic)) logging.info("Registered line magic `%s`", line_magic.__name__) except Exception as err: startup_error = err logging.exception("Kedro's ipython session startup script failed:\n%s", str(err)) raise err
def test_node_hook_logging_above_limit_tag_strategy(kedro_project, dummy_run_params, param_length): _write_yaml( kedro_project / "conf" / "local" / "mlflow.yml", dict(hooks=dict(node=dict(long_parameters_strategy="tag")), ), ) mlflow_tracking_uri = (kedro_project / "mlruns").as_uri() mlflow.set_tracking_uri(mlflow_tracking_uri) mlflow_node_hook = MlflowNodeHook() param_value = param_length * "a" node_inputs = {"params:my_param": param_value} project_metadata = _get_project_metadata(kedro_project) _add_src_to_path(project_metadata.source_dir, kedro_project) configure_project(project_metadata.package_name) with KedroSession.create( package_name=project_metadata.package_name, project_path=kedro_project, ): with mlflow.start_run(): mlflow_node_hook.before_pipeline_run( run_params=dummy_run_params, pipeline=Pipeline([]), catalog=DataCatalog(), ) # IMPORTANT: Overpassing the parameters limit # should raise an error for all mlflow backend # but it does not on FileStore backend : # https://github.com/mlflow/mlflow/issues/2814#issuecomment-628284425 # Since we use FileStore system for simplicty for tests logging works # But we have enforced failure (which is slightly different from mlflow # behaviour) mlflow_node_hook.before_node_run( node=node(func=lambda x: x, inputs=dict(x="a"), outputs=None), catalog=DataCatalog(), # can be empty inputs=node_inputs, is_async=False, run_id="132", ) run_id = mlflow.active_run().info.run_id mlflow_client = MlflowClient(mlflow_tracking_uri) current_run = mlflow_client.get_run(run_id) assert current_run.data.params == {} assert { k: v for k, v in current_run.data.tags.items() if not k.startswith("mlflow") } == { "my_param": param_value }
def _load_project(project_path): # pragma: no cover # TODO: This one can potentially become project bootstrap and will be # tested there if not _is_project(project_path): return None metadata = _get_project_metadata(project_path) _add_src_to_path(metadata.source_dir, project_path) configure_project(metadata.package_name) return metadata
def _call_viz( host=None, port=None, browser=None, load_file=None, save_file=None, pipeline_name=None, env=None, project_path=None, ): global _DATA # pylint: disable=global-statement,invalid-name global _CATALOG # pylint: disable=global-statement if load_file: # Remove all handlers for root logger root_logger = logging.getLogger() root_logger.handlers = [] _DATA = _load_from_file(load_file) else: try: project_path = project_path or Path.cwd() if KEDRO_VERSION.match(">=0.17.0"): # pragma: no cover from kedro.framework.session import KedroSession from kedro.framework.startup import ( # pylint: disable=no-name-in-module,import-error _get_project_metadata, ) package_name = _get_project_metadata(project_path).package_name session_kwargs = dict( package_name=package_name, project_path=project_path, env=env, save_on_close=False, ) session = KedroSession.create( # pylint: disable=unexpected-keyword-arg **session_kwargs ) context = session.load_context() # pylint: disable=no-member pipelines = _get_pipelines_from_context(context, pipeline_name) else: # pragma: no cover context = load_context(project_path=project_path, env=env) pipelines = _get_pipelines_from_context(context, pipeline_name) except KedroContextError: raise KedroCliError(ERROR_PROJECT_ROOT) # pragma: no cover _CATALOG = context.catalog _DATA = format_pipelines_data(pipelines) if save_file: Path(save_file).write_text(json.dumps(_DATA, indent=4, sort_keys=True)) else: is_localhost = host in ("127.0.0.1", "localhost", "0.0.0.0") if browser and is_localhost: webbrowser.open_new("http://{}:{:d}/".format(host, port)) app.run(host=host, port=port)
def test_invalid_version(self, invalid_version, mocker): mocker.patch.object(Path, "is_file", return_value=True) pyproject_toml_payload = { "tool": { "kedro": { "source_dir": "source_dir", "package_name": "fake_package_name", "project_name": "fake_project_name", "project_version": invalid_version, } } } mocker.patch("anyconfig.load", return_value=pyproject_toml_payload) pattern = ( f"Your Kedro project version {invalid_version} does not match " f"Kedro package version {kedro_version} you are running.") with pytest.raises(ValueError, match=re.escape(pattern)): _get_project_metadata(self.project_path)
def test_toml_file_with_extra_keys(self, mocker): mocker.patch.object(Path, "is_file", return_value=True) pyproject_toml_payload = { "tool": { "kedro": { "package_name": "fake_package_name", "project_name": "fake_project_name", "project_version": kedro_version, "unexpected_key": "hello", } } } mocker.patch("anyconfig.load", return_value=pyproject_toml_payload) pattern = ("Found unexpected keys in 'pyproject.toml'. Make sure it " "only contains the following keys: ['package_name', " "'project_name', 'project_version', 'source_dir'].") with pytest.raises(RuntimeError, match=re.escape(pattern)): _get_project_metadata(self.project_path)
def test_get_mlflow_config_in_uninitialized_project(kedro_project): # config_with_base_mlflow_conf is a pytest.fixture in conftest with pytest.raises( KedroMlflowConfigError, match="No 'mlflow.yml' config file found in environment"): project_metadata = _get_project_metadata(kedro_project) _add_src_to_path(project_metadata.source_dir, kedro_project) configure_project(project_metadata.package_name) with KedroSession.create(project_metadata.package_name, kedro_project): get_mlflow_config()
def test_mlflow_pipeline_hook_with_pipeline_ml_signature( kedro_project_with_mlflow_conf, env_from_dict, dummy_pipeline, dummy_catalog, dummy_run_params, model_signature, expected_signature, ): # config_with_base_mlflow_conf is a conftest fixture project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf) _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf) configure_project(project_metadata.package_name) with KedroSession.create( package_name=project_metadata.package_name, project_path=kedro_project_with_mlflow_conf, ): pipeline_hook = MlflowPipelineHook() runner = SequentialRunner() pipeline_to_run = pipeline_ml_factory( training=dummy_pipeline.only_nodes_with_tags("training"), inference=dummy_pipeline.only_nodes_with_tags("inference"), input_name="raw_data", conda_env=env_from_dict, model_name="model", model_signature=model_signature, ) pipeline_hook.after_catalog_created( catalog=dummy_catalog, # `after_catalog_created` is not using any of arguments bellow, # so we are setting them to empty values. conf_catalog={}, conf_creds={}, feed_dict={}, save_version="", load_versions="", run_id=dummy_run_params["run_id"], ) pipeline_hook.before_pipeline_run( run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog ) runner.run(pipeline_to_run, dummy_catalog) run_id = mlflow.active_run().info.run_id pipeline_hook.after_pipeline_run( run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog ) # test : parameters should have been logged trained_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model") assert trained_model.metadata.signature == expected_signature
def init(env, force, silent): """Updates the template of a kedro project. Running this command is mandatory to use kedro-mlflow. This adds "conf/base/mlflow.yml": This is a configuration file used for run parametrization when calling "kedro run" command. See INSERT_DOC_URL for further details. """ # get constants mlflow_yml = "mlflow.yml" project_path = Path().cwd() project_metadata = _get_project_metadata(project_path) _add_src_to_path(project_metadata.source_dir, project_path) configure_project(project_metadata.package_name) session = KedroSession.create( project_metadata.package_name, project_path=project_path ) context = session.load_context() mlflow_yml_path = project_path / context.CONF_ROOT / env / mlflow_yml # mlflow.yml is just a static file, # but the name of the experiment is set to be the same as the project if mlflow_yml_path.is_file() and not force: click.secho( click.style( f"A 'mlflow.yml' already exists at '{mlflow_yml_path}' You can use the ``--force`` option to override it.", fg="red", ) ) else: try: write_jinja_template( src=TEMPLATE_FOLDER_PATH / mlflow_yml, is_cookiecutter=False, dst=mlflow_yml_path, python_package=project_metadata.package_name, ) except FileNotFoundError: click.secho( click.style( f"No env '{env}' found. Please check this folder exists inside '{context.CONF_ROOT}' folder.", fg="red", ) ) if not silent: click.secho( click.style( f"'{context.CONF_ROOT}/{env}/{mlflow_yml}' successfully updated.", fg="green", ) )
def load_context(project_path: Union[str, Path], **kwargs) -> KedroContext: """Loads the KedroContext object of a Kedro Project. This is the default way to load the KedroContext object for normal workflows such as CLI, Jupyter Notebook, Plugins, etc. It assumes the following project structure under the given project_path:: <project_path> |__ <src_dir> |__ pyproject.toml The name of the <scr_dir> is `src` by default. The `pyproject.toml` file is used for project metadata. Kedro configuration should be under `[tool.kedro]` section. Args: project_path: Path to the Kedro project. kwargs: Optional kwargs for ``KedroContext`` class. Returns: Instance of ``KedroContext`` class defined in Kedro project. Raises: KedroContextError: `pyproject.toml` was not found or the `[tool.kedro]` section is missing, or loaded context has package conflict. """ warn( "`kedro.framework.context.load_context` is now deprecated in favour of " "`KedroSession.load_context` and will be removed in Kedro 0.18.0.", DeprecationWarning, ) project_path = Path(project_path).expanduser().resolve() metadata = _get_project_metadata(project_path) context_class = _get_project_settings( metadata.package_name, "CONTEXT_CLASS", KedroContext ) # update kwargs with env from the environment variable # (defaults to None if not set) # need to do this because some CLI command (e.g `kedro run`) defaults to # passing in `env=None` kwargs["env"] = kwargs.get("env") or os.getenv("KEDRO_ENV") context = context_class( package_name=metadata.package_name, project_path=project_path, **kwargs ) return context
def test_on_pipeline_error(kedro_project_with_mlflow_conf): tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri() project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf) _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf) configure_project(project_metadata.package_name) with KedroSession.create( package_name=project_metadata.package_name, project_path=kedro_project_with_mlflow_conf, ): def failing_node(): mlflow.start_run(nested=True) raise ValueError("Let's make this pipeline fail") class DummyContextWithHook(KedroContext): project_name = "fake project" package_name = "fake_project" project_version = "0.16.5" hooks = (MlflowPipelineHook(),) def _get_pipeline(self, name: str = None) -> Pipeline: return Pipeline( [ node( func=failing_node, inputs=None, outputs="fake_output", ) ] ) with pytest.raises(ValueError): failing_context = DummyContextWithHook( "fake_package", kedro_project_with_mlflow_conf.as_posix() ) failing_context.run() # the run we want is the last one in Default experiment failing_run_info = MlflowClient(tracking_uri).list_run_infos("0")[0] assert mlflow.active_run() is None # the run must have been closed assert failing_run_info.status == RunStatus.to_string( RunStatus.FAILED ) # it must be marked as failed
def test_source_dir_specified_in_toml(self, mocker): mocker.patch.object(Path, "is_file", return_value=True) source_dir = "test_dir" pyproject_toml_payload = { "tool": { "kedro": { "source_dir": source_dir, "package_name": "fake_package_name", "project_name": "fake_project_name", "project_version": kedro_version, } } } mocker.patch("anyconfig.load", return_value=pyproject_toml_payload) project_metadata = _get_project_metadata(self.project_path) assert project_metadata.source_dir == self.project_path / source_dir
def test_kedro_mlflow_config_new_experiment_does_not_exists( kedro_project_with_mlflow_conf, ): config = KedroMlflowConfig( project_path=kedro_project_with_mlflow_conf, mlflow_tracking_uri="mlruns", experiment_opts=dict(name="exp1"), ) project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf) _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf) configure_project(project_metadata.package_name) with KedroSession.create( "fake_project", project_path=kedro_project_with_mlflow_conf ): config.setup() assert "exp1" in [exp.name for exp in config.mlflow_client.list_experiments()]
def test_node_hook_logging_above_limit_truncate_strategy( kedro_project, dummy_run_params, param_length): _write_yaml( kedro_project / "conf" / "local" / "mlflow.yml", dict(hooks=dict(node=dict(long_parameters_strategy="truncate")), ), ) mlflow_tracking_uri = (kedro_project / "mlruns").as_uri() mlflow.set_tracking_uri(mlflow_tracking_uri) mlflow_node_hook = MlflowNodeHook() param_value = param_length * "a" node_inputs = {"params:my_param": param_value} project_metadata = _get_project_metadata(kedro_project) _add_src_to_path(project_metadata.source_dir, kedro_project) configure_project(project_metadata.package_name) with KedroSession.create( package_name=project_metadata.package_name, project_path=kedro_project, ): with mlflow.start_run(): mlflow_node_hook.before_pipeline_run( run_params=dummy_run_params, pipeline=Pipeline([]), catalog=DataCatalog(), ) mlflow_node_hook.before_node_run( node=node(func=lambda x: x, inputs=dict(x="a"), outputs=None), catalog=DataCatalog(), # can be empty inputs=node_inputs, is_async=False, run_id="132", ) run_id = mlflow.active_run().info.run_id mlflow_client = MlflowClient(mlflow_tracking_uri) current_run = mlflow_client.get_run(run_id) assert current_run.data.params == { "my_param": param_value[0:MAX_PARAM_VAL_LENGTH] }
def load_kedro_objects(path, line=None): # pylint: disable=unused-argument """Line magic which reloads all Kedro default variables.""" import kedro.config.default_logger # noqa: F401 # pylint: disable=unused-import from kedro.framework.cli import load_entry_points from kedro.framework.cli.utils import _add_src_to_path from kedro.framework.project import configure_project from kedro.framework.session import KedroSession from kedro.framework.session.session import _activate_session from kedro.framework.startup import _get_project_metadata global context global catalog global session path = path or project_path metadata = _get_project_metadata(path) _add_src_to_path(metadata.source_dir, path) configure_project(metadata.package_name) _clear_hook_manager() _remove_cached_modules(metadata.package_name) session = KedroSession.create(metadata.package_name, path) _activate_session(session) logging.debug("Loading the context from %s", str(path)) context = session.load_context() catalog = context.catalog get_ipython().push(variables={ "context": context, "catalog": catalog, "session": session }) logging.info("** Kedro project %s", str(metadata.project_name)) logging.info("Defined global variable `context`, `session` and `catalog`") for line_magic in load_entry_points("line_magic"): register_line_magic(needs_local_scope(line_magic)) logging.info("Registered line magic `%s`", line_magic.__name__)
def test_kedro_mlflow_config_setup_export_credentials(kedro_project_with_mlflow_conf): (kedro_project_with_mlflow_conf / "conf/base/credentials.yml").write_text( yaml.dump(dict(my_mlflow_creds=dict(fake_mlflow_cred="my_fake_cred"))) ) # the config must restore properly the experiment config = KedroMlflowConfig( project_path=kedro_project_with_mlflow_conf, credentials="my_mlflow_creds" ) project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf) _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf) configure_project(project_metadata.package_name) with KedroSession.create( "fake_project", project_path=kedro_project_with_mlflow_conf ): config.setup() assert os.environ["fake_mlflow_cred"] == "my_fake_cred"
def test_kedro_mlflow_config_experiment_exists(mocker, kedro_project_with_mlflow_conf): # create an experiment with the same name mlflow_tracking_uri = ( kedro_project_with_mlflow_conf / "conf" / "local" / "mlruns" ).as_uri() MlflowClient(mlflow_tracking_uri).create_experiment("exp1") config = KedroMlflowConfig( project_path=kedro_project_with_mlflow_conf, mlflow_tracking_uri="mlruns", experiment_opts=dict(name="exp1"), ) project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf) _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf) configure_project(project_metadata.package_name) with KedroSession.create( "fake_project", project_path=kedro_project_with_mlflow_conf ): config.setup() assert "exp1" in [exp.name for exp in config.mlflow_client.list_experiments()]
def test_kedro_mlflow_config_setup_set_tracking_uri(kedro_project_with_mlflow_conf): # create an experiment with the same name and then delete it mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "awesome_tracking").as_uri() # the config must restore properly the experiment config = KedroMlflowConfig( project_path=kedro_project_with_mlflow_conf, mlflow_tracking_uri="awesome_tracking", experiment_opts=dict(name="exp1"), ) project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf) _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf) configure_project(project_metadata.package_name) with KedroSession.create( "fake_project", project_path=kedro_project_with_mlflow_conf ): config.setup() assert mlflow.get_tracking_uri() == mlflow_tracking_uri
def test_valid_toml_file(self, mocker): mocker.patch.object(Path, "is_file", return_value=True) pyproject_toml_payload = { "tool": { "kedro": { "package_name": "fake_package_name", "project_name": "fake_project_name", "project_version": kedro_version, } } } mocker.patch("anyconfig.load", return_value=pyproject_toml_payload) actual = _get_project_metadata(self.project_path) expected = ProjectMetadata( source_dir=self.project_path / "src", # default config_file=self.project_path / "pyproject.toml", package_name="fake_package_name", project_name="fake_project_name", project_version=kedro_version, project_path=self.project_path, ) assert actual == expected