Пример #1
0
def test_kedro_mlflow_config_setup_tracking_priority(kedro_project_with_mlflow_conf):
    """Test if the mlflow_tracking uri set is the one of mlflow.yml
    if it also eist in credentials.

    Args:
        mocker ([type]): [description]
        tmp_path ([type]): [description]
    """
    # create a ".kedro.yml" file to identify "tmp_path" as the root of a kedro project

    (kedro_project_with_mlflow_conf / "conf/base/credentials.yml").write_text(
        yaml.dump(dict(my_mlflow_creds=dict(mlflow_tracking_uri="mlruns2")))
    )

    config = KedroMlflowConfig(
        project_path=kedro_project_with_mlflow_conf,
        mlflow_tracking_uri="mlruns1",
        credentials="my_mlflow_creds",
    )

    project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf)
    _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
        "fake_project", project_path=kedro_project_with_mlflow_conf
    ):
        config.setup()

    assert (
        mlflow.get_tracking_uri()
        == (kedro_project_with_mlflow_conf / "mlruns1").as_uri()
    )
Пример #2
0
def main():  # pragma: no cover
    """Main entry point. Look for a ``cli.py``, and, if found, add its
    commands to `kedro`'s before invoking the CLI.
    """
    _init_plugins()

    global_groups = [cli]
    global_groups.extend(load_entry_points("global"))
    project_groups = []
    cli_context = dict()

    path = Path.cwd()
    if _is_project(path):
        # load project commands from cli.py
        metadata = _get_project_metadata(path)
        package_name = metadata.package_name
        cli_context = dict(obj=metadata)
        _add_src_to_path(metadata.source_dir, path)
        configure_project(package_name)

        project_groups.extend(load_entry_points("project"))

        try:
            project_cli = importlib.import_module(f"{package_name}.cli")
            project_groups.append(project_cli.cli)
        except Exception as exc:
            raise KedroCliError(
                f"Cannot load commands from {package_name}.cli") from exc

    cli_collection = CommandCollection(
        ("Global commands", global_groups),
        ("Project specific commands", project_groups),
    )
    cli_collection(**cli_context)
def test_mlflow_pipeline_hook_with_copy_mode(
    kedro_project_with_mlflow_conf,
    dummy_pipeline_ml,
    dummy_catalog,
    dummy_run_params,
    copy_mode,
    expected,
):
    # config_with_base_mlflow_conf is a conftest fixture
    project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf)
    _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
        package_name=project_metadata.package_name,
        project_path=kedro_project_with_mlflow_conf,
    ):

        pipeline_hook = MlflowPipelineHook()
        runner = SequentialRunner()

        pipeline_hook.after_catalog_created(
            catalog=dummy_catalog,
            # `after_catalog_created` is not using any of arguments bellow,
            # so we are setting them to empty values.
            conf_catalog={},
            conf_creds={},
            feed_dict={},
            save_version="",
            load_versions="",
            run_id=dummy_run_params["run_id"],
        )

        pipeline_to_run = pipeline_ml_factory(
            training=dummy_pipeline_ml.training,
            inference=dummy_pipeline_ml.inference,
            input_name=dummy_pipeline_ml.input_name,
            conda_env={},
            model_name=dummy_pipeline_ml.model_name,
            copy_mode=copy_mode,
        )
        pipeline_hook.before_pipeline_run(
            run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog
        )
        runner.run(pipeline_to_run, dummy_catalog)
        run_id = mlflow.active_run().info.run_id
        pipeline_hook.after_pipeline_run(
            run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog
        )

        mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri()
        mlflow.set_tracking_uri(mlflow_tracking_uri)

        loaded_model = mlflow.pyfunc.load_model(model_uri=f"runs:/{run_id}/model")

        actual_copy_mode = {
            name: ds._copy_mode
            for name, ds in loaded_model._model_impl.python_model.loaded_catalog._data_sets.items()
        }

        assert actual_copy_mode == expected
Пример #4
0
    def test_get_current_session(self, fake_project, mock_package_name):
        assert get_current_session(silent=True) is None  # no sessions yet

        pattern = "There is no active Kedro session"
        with pytest.raises(RuntimeError, match=pattern):
            get_current_session()

        configure_project(mock_package_name)
        session1 = KedroSession.create(mock_package_name, fake_project)
        session2 = KedroSession.create(mock_package_name, fake_project)

        with session1:
            assert get_current_session() is session1

            pattern = (
                "Cannot activate the session as another active session already exists"
            )
            with pytest.raises(RuntimeError, match=pattern), session2:
                pass  # pragma: no cover

        # session has been closed, so no current sessions should be available
        assert get_current_session(silent=True) is None

        with session2:
            assert get_current_session() is session2
def test_mlflow_config_with_templated_config_loader(kedro_project_with_tcl, ):

    _write_yaml(
        kedro_project_with_tcl / "conf" / "local" / "mlflow.yml",
        dict(
            mlflow_tracking_uri="${mlflow_tracking_uri}",
            credentials=None,
            disable_tracking=dict(pipelines=["my_disabled_pipeline"]),
            experiment=dict(name="fake_package", create=True),
            run=dict(id="123456789", name="my_run", nested=True),
            ui=dict(port="5151", host="localhost"),
            hooks=dict(node=dict(
                flatten_dict_params=True,
                recursive=False,
                sep="-",
                long_parameters_strategy="truncate",
            )),
        ),
    )

    _write_yaml(
        kedro_project_with_tcl / "conf" / "local" / "globals.yml",
        dict(mlflow_tracking_uri="dynamic_mlruns"),
    )

    expected = {
        "mlflow_tracking_uri":
        (kedro_project_with_tcl / "dynamic_mlruns").as_uri(),
        "credentials": None,
        "disable_tracking": {
            "pipelines": ["my_disabled_pipeline"]
        },
        "experiments": {
            "name": "fake_package",
            "create": True
        },
        "run": {
            "id": "123456789",
            "name": "my_run",
            "nested": True
        },
        "ui": {
            "port": "5151",
            "host": "localhost"
        },
        "hooks": {
            "node": {
                "flatten_dict_params": True,
                "recursive": False,
                "sep": "-",
                "long_parameters_strategy": "truncate",
            }
        },
    }
    project_metadata = _get_project_metadata(kedro_project_with_tcl)
    _add_src_to_path(project_metadata.source_dir, kedro_project_with_tcl)
    configure_project(project_metadata.package_name)
    with KedroSession.create(project_metadata.package_name,
                             kedro_project_with_tcl):
        assert get_mlflow_config().to_dict() == expected
Пример #6
0
def run_package():
    # Entry point for running a Kedro project packaged with `kedro package`
    # using `python -m <project_package>.run` command.
    package_name = Path(__file__).resolve().parent.name
    configure_project(package_name)
    with KedroSession.create(package_name) as session:
        session.run()
Пример #7
0
    def test_get_current_session(self, fake_project, mocker):
        assert get_current_session(silent=True) is None  # no sessions yet

        pattern = "There is no active Kedro session"
        with pytest.raises(RuntimeError, match=pattern):
            get_current_session()

        mocker.patch("kedro.framework.project._validate_module")
        configure_project(_FAKE_PACKAGE_NAME)
        session1 = KedroSession.create(_FAKE_PACKAGE_NAME, fake_project)
        session2 = KedroSession.create(_FAKE_PACKAGE_NAME, fake_project)

        with session1:
            assert get_current_session() is session1

            pattern = (
                "Cannot activate the session as another active session already exists"
            )
            with pytest.raises(RuntimeError, match=pattern), session2:
                pass  # pragma: no cover

        # session has been closed, so no current sessions should be available
        assert get_current_session(silent=True) is None

        with session2:
            assert get_current_session() is session2
Пример #8
0
def test_pipeline_run_hook_getting_configs(
    kedro_project,
    dummy_run_params,
    dummy_pipeline,
    dummy_catalog,
):

    _write_yaml(
        kedro_project / "conf" / "local" / "mlflow.yml",
        dict(hooks=dict(
            node=dict(flatten_dict_params=True, recursive=False, sep="-")), ),
    ),

    project_metadata = _get_project_metadata(kedro_project)
    _add_src_to_path(project_metadata.source_dir, kedro_project)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
            package_name=project_metadata.package_name,
            project_path=kedro_project,
    ):
        mlflow_node_hook = MlflowNodeHook()
        mlflow_node_hook.before_pipeline_run(run_params=dummy_run_params,
                                             pipeline=dummy_pipeline,
                                             catalog=dummy_catalog)

        assert (
            mlflow_node_hook.flatten,
            mlflow_node_hook.recursive,
            mlflow_node_hook.sep,
        ) == (True, False, "-")
Пример #9
0
    def test_register_pipelines_is_called(self, mock_session_with_hooks,
                                          dummy_dataframe, caplog):
        # not sure why this test needs this line here too
        # and not just in the session fixture
        configure_project(MOCK_PACKAGE_NAME)
        context = mock_session_with_hooks.load_context()
        catalog = context.catalog
        catalog.save("cars", dummy_dataframe)
        catalog.save("boats", dummy_dataframe)
        mock_session_with_hooks.run()

        register_pipelines_calls = [
            record for record in caplog.records
            if record.funcName == "register_pipelines"
        ]
        assert len(register_pipelines_calls) == 1
        call_record = register_pipelines_calls[0]
        _assert_hook_call_record_has_expected_parameters(call_record, [])

        expected_pipelines = {
            "__default__": CONTEXT_PIPELINE,
            "de": CONTEXT_PIPELINE,
            "pipe": CONTEXT_PIPELINE,
        }
        assert context.pipelines == expected_pipelines
Пример #10
0
def ui(env, port, host):
    """Opens the mlflow user interface with the
    project-specific settings of mlflow.yml. This interface
    enables to browse and compares runs.
    """

    project_path = Path().cwd()
    project_metadata = _get_project_metadata(project_path)
    _add_src_to_path(project_metadata.source_dir, project_path)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
        package_name=project_metadata.package_name,
        project_path=project_path,
        env=env,
    ):

        mlflow_conf = get_mlflow_config()
        host = host or mlflow_conf.ui_opts.get("host")
        port = port or mlflow_conf.ui_opts.get("port")

        # call mlflow ui with specific options
        # TODO : add more options for ui
        subprocess.call(
            [
                "mlflow",
                "ui",
                "--backend-store-uri",
                mlflow_conf.mlflow_tracking_uri,
                "--host",
                host,
                "--port",
                port,
            ]
        )
Пример #11
0
def fake_project_cli(fake_repo_path: Path, dummy_config: Path):
    old_settings = settings.as_dict()
    starter_path = Path(__file__).parents[3].resolve()
    starter_path = starter_path / "features" / "steps" / "test_starter"
    # This is needed just for the tests, those CLI groups are merged in our
    # code when invoking `kedro` but when imported, they still need to be merged
    kedro_cli = click.CommandCollection(name="Kedro",
                                        sources=[cli, create_cli])
    CliRunner().invoke(
        kedro_cli,
        ["new", "-c",
         str(dummy_config), "--starter",
         str(starter_path)],
    )

    # NOTE: Here we load a couple of modules, as they would be imported in
    # the code and tests.
    # It's safe to remove the new entries from path due to the python
    # module caching mechanism. Any `reload` on it will not work though.
    old_path = sys.path.copy()
    sys.path = [str(fake_repo_path / "src")] + sys.path

    import_module(PACKAGE_NAME)
    configure_project(PACKAGE_NAME)
    yield import_module(f"{PACKAGE_NAME}.cli")

    # reset side-effects of configure_project
    pipelines.clear()
    for key, value in old_settings.items():
        settings.set(key, value)
    sys.path = old_path
    del sys.modules[PACKAGE_NAME]
Пример #12
0
def fake_project_cli(
    fake_repo_path: Path, dummy_config: Path, fake_kedro_cli: click.CommandCollection
):
    old_settings = settings.as_dict()
    starter_path = Path(__file__).parents[3].resolve()
    starter_path = starter_path / "features" / "steps" / "test_starter"
    CliRunner().invoke(
        fake_kedro_cli, ["new", "-c", str(dummy_config), "--starter", str(starter_path)]
    )

    # NOTE: Here we load a couple of modules, as they would be imported in
    # the code and tests.
    # It's safe to remove the new entries from path due to the python
    # module caching mechanism. Any `reload` on it will not work though.
    old_path = sys.path.copy()
    sys.path = [str(fake_repo_path / "src")] + sys.path

    import_module(PACKAGE_NAME)
    configure_project(PACKAGE_NAME)
    yield fake_kedro_cli

    # reset side-effects of configure_project
    pipelines._clear(PACKAGE_NAME)  # this resets pipelines loading state
    for key, value in old_settings.items():
        settings.set(key, value)
    sys.path = old_path
    del sys.modules[PACKAGE_NAME]
Пример #13
0
def test_kedro_mlflow_config_experiment_was_deleted(kedro_project_with_mlflow_conf):

    # create an experiment with the same name and then delete it
    mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri()
    mlflow_client = MlflowClient(mlflow_tracking_uri)
    mlflow_client.create_experiment("exp1")
    mlflow_client.delete_experiment(
        mlflow_client.get_experiment_by_name("exp1").experiment_id
    )

    # the config must restore properly the experiment
    config = KedroMlflowConfig(
        project_path=kedro_project_with_mlflow_conf,
        mlflow_tracking_uri="mlruns",
        experiment_opts=dict(name="exp1"),
    )

    project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf)
    _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
        "fake_project", project_path=kedro_project_with_mlflow_conf
    ):
        config.setup()

    assert "exp1" in [exp.name for exp in config.mlflow_client.list_experiments()]
Пример #14
0
def bootstrap_project(project_path: Path) -> ProjectMetadata:
    """Run setup required at the beginning of the workflow
    when running in project mode, and return project metadata.
    """
    metadata = _get_project_metadata(project_path)
    _add_src_to_path(metadata.source_dir, project_path)
    configure_project(metadata.package_name)
    return metadata
Пример #15
0
def reload_kedro(path, line=None):
    """Line magic which reloads all Kedro default variables."""
    global startup_error
    global context
    global catalog
    global session

    try:
        import kedro.config.default_logger
        from kedro.framework.hooks import get_hook_manager
        from kedro.framework.project import configure_project
        from kedro.framework.session import KedroSession
        from kedro.framework.session.session import _activate_session
        from kedro.framework.cli.jupyter import collect_line_magic
    except ImportError:
        logging.error(
            "Kedro appears not to be installed in your current environment "
            "or your current IPython session was not started in a valid Kedro project."
        )
        raise

    try:
        path = path or project_path

        # clear hook manager
        hook_manager = get_hook_manager()
        name_plugin_pairs = hook_manager.list_name_plugin()
        for name, plugin in name_plugin_pairs:
            hook_manager.unregister(name=name, plugin=plugin)

        # remove cached user modules
        metadata = _get_project_metadata(path)
        to_remove = [
            mod for mod in sys.modules if mod.startswith(metadata.package_name)
        ]
        # `del` is used instead of `reload()` because: If the new version of a module does not
        # define a name that was defined by the old version, the old definition remains.
        for module in to_remove:
            del sys.modules[module]

        configure_project(metadata.package_name)
        session = KedroSession.create(metadata.package_name, path)
        _activate_session(session, force=True)
        logging.debug("Loading the context from %s", str(path))
        context = session.load_context()
        catalog = context.catalog

        logging.info("** Kedro project %s", str(metadata.project_name))
        logging.info("Defined global variable `context` and `catalog`")

        for line_magic in collect_line_magic():
            register_line_magic(needs_local_scope(line_magic))
            logging.info("Registered line magic `%s`", line_magic.__name__)
    except Exception as err:
        startup_error = err
        logging.exception("Kedro's ipython session startup script failed:\n%s",
                          str(err))
        raise err
Пример #16
0
def _bootstrap_subprocess(package_name: str, conf_logging: Dict[str, Any]):
    # pylint: disable=import-outside-toplevel,protected-access,cyclic-import
    from kedro.framework.project import configure_project
    from kedro.framework.session.session import _register_all_project_hooks

    hook_manager = get_hook_manager()
    configure_project(package_name)
    _register_all_project_hooks(hook_manager)
    logging.config.dictConfig(conf_logging)
Пример #17
0
def mock_session_with_hooks(tmp_path, mock_settings_with_logging_hooks,
                            logging_hooks, mocker):  # pylint: disable=unused-argument
    mocker.patch("kedro.framework.project._validate_module")
    logging_hooks.queue_listener.start()
    configure_project(MOCK_PACKAGE_NAME)
    yield KedroSession.create(MOCK_PACKAGE_NAME,
                              tmp_path,
                              extra_params={"params:key": "value"})
    logging_hooks.queue_listener.stop()
Пример #18
0
def test_node_hook_logging_above_limit_tag_strategy(kedro_project,
                                                    dummy_run_params,
                                                    param_length):

    _write_yaml(
        kedro_project / "conf" / "local" / "mlflow.yml",
        dict(hooks=dict(node=dict(long_parameters_strategy="tag")), ),
    )

    mlflow_tracking_uri = (kedro_project / "mlruns").as_uri()
    mlflow.set_tracking_uri(mlflow_tracking_uri)

    mlflow_node_hook = MlflowNodeHook()

    param_value = param_length * "a"
    node_inputs = {"params:my_param": param_value}

    project_metadata = _get_project_metadata(kedro_project)
    _add_src_to_path(project_metadata.source_dir, kedro_project)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
            package_name=project_metadata.package_name,
            project_path=kedro_project,
    ):
        with mlflow.start_run():
            mlflow_node_hook.before_pipeline_run(
                run_params=dummy_run_params,
                pipeline=Pipeline([]),
                catalog=DataCatalog(),
            )

            # IMPORTANT: Overpassing the parameters limit
            # should raise an error for all mlflow backend
            # but it does not on FileStore backend :
            # https://github.com/mlflow/mlflow/issues/2814#issuecomment-628284425
            # Since we use FileStore system for simplicty for tests logging works
            # But we have enforced failure (which is slightly different from mlflow
            # behaviour)
            mlflow_node_hook.before_node_run(
                node=node(func=lambda x: x, inputs=dict(x="a"), outputs=None),
                catalog=DataCatalog(),  # can be empty
                inputs=node_inputs,
                is_async=False,
                run_id="132",
            )
            run_id = mlflow.active_run().info.run_id

        mlflow_client = MlflowClient(mlflow_tracking_uri)
        current_run = mlflow_client.get_run(run_id)
        assert current_run.data.params == {}
        assert {
            k: v
            for k, v in current_run.data.tags.items()
            if not k.startswith("mlflow")
        } == {
            "my_param": param_value
        }
Пример #19
0
def dummy_context(tmp_path, prepare_project_dir, env, extra_params, mocker):  # pylint: disable=unused-argument
    mocker.patch("kedro.framework.project._validate_module")
    configure_project(MOCK_PACKAGE_NAME)
    context = KedroContext(MOCK_PACKAGE_NAME,
                           str(tmp_path),
                           env=env,
                           extra_params=extra_params)

    return context
Пример #20
0
 def _load_project(project_path):  # pragma: no cover
     # TODO: This one can potentially become project bootstrap and will be
     #  tested there
     if not _is_project(project_path):
         return None
     metadata = _get_project_metadata(project_path)
     _add_src_to_path(metadata.source_dir, project_path)
     configure_project(metadata.package_name)
     return metadata
def test_get_mlflow_config_in_uninitialized_project(kedro_project):
    # config_with_base_mlflow_conf is a pytest.fixture in conftest
    with pytest.raises(
            KedroMlflowConfigError,
            match="No 'mlflow.yml' config file found in environment"):
        project_metadata = _get_project_metadata(kedro_project)
        _add_src_to_path(project_metadata.source_dir, kedro_project)
        configure_project(project_metadata.package_name)
        with KedroSession.create(project_metadata.package_name, kedro_project):
            get_mlflow_config()
Пример #22
0
def dummy_context(
    tmp_path, prepare_project_dir, env, extra_params, mocker
):  # pylint: disable=unused-argument
    configure_project(MOCK_PACKAGE_NAME)
    context = KedroContext(
        MOCK_PACKAGE_NAME, str(tmp_path), env=env, extra_params=extra_params
    )

    yield context
    pipelines._clear(MOCK_PACKAGE_NAME)
Пример #23
0
def test_configure_project_should_not_raise_for_unimportable_pipelines(
    mock_package_name_with_unimportable_pipelines_file, ):
    # configure_project should not raise error for unimportable pipelines
    # since pipelines loading is lazy
    configure_project(mock_package_name_with_unimportable_pipelines_file)

    # accessing data should raise for unimportable pipelines
    with pytest.raises(ModuleNotFoundError,
                       match="No module named 'this_is_not_a_real_thing'"):
        _ = pipelines["new_pipeline"]
Пример #24
0
def get_session():
    "Get kedro session"
    cur_path = os.getcwd()
    os.chdir(Path(__file__).parents[2])
    configure_project("kedro_171_package")
    session = KedroSession.create(Path(__file__).resolve().parent.name)
    _activate_session(session, force=True)
    os.chdir(cur_path)

    return session
Пример #25
0
def test_mlflow_pipeline_hook_with_pipeline_ml_signature(
    kedro_project_with_mlflow_conf,
    env_from_dict,
    dummy_pipeline,
    dummy_catalog,
    dummy_run_params,
    model_signature,
    expected_signature,
):
    # config_with_base_mlflow_conf is a conftest fixture
    project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf)
    _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
        package_name=project_metadata.package_name,
        project_path=kedro_project_with_mlflow_conf,
    ):
        pipeline_hook = MlflowPipelineHook()
        runner = SequentialRunner()

        pipeline_to_run = pipeline_ml_factory(
            training=dummy_pipeline.only_nodes_with_tags("training"),
            inference=dummy_pipeline.only_nodes_with_tags("inference"),
            input_name="raw_data",
            conda_env=env_from_dict,
            model_name="model",
            model_signature=model_signature,
        )

        pipeline_hook.after_catalog_created(
            catalog=dummy_catalog,
            # `after_catalog_created` is not using any of arguments bellow,
            # so we are setting them to empty values.
            conf_catalog={},
            conf_creds={},
            feed_dict={},
            save_version="",
            load_versions="",
            run_id=dummy_run_params["run_id"],
        )
        pipeline_hook.before_pipeline_run(
            run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog
        )
        runner.run(pipeline_to_run, dummy_catalog)
        run_id = mlflow.active_run().info.run_id
        pipeline_hook.after_pipeline_run(
            run_params=dummy_run_params, pipeline=pipeline_to_run, catalog=dummy_catalog
        )

        # test : parameters should have been logged
        trained_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")
        assert trained_model.metadata.signature == expected_signature
Пример #26
0
def init(env, force, silent):
    """Updates the template of a kedro project.
    Running this command is mandatory to use kedro-mlflow.
    This adds "conf/base/mlflow.yml": This is a configuration file
    used for run parametrization when calling "kedro run" command.
    See INSERT_DOC_URL for further details.
    """

    # get constants
    mlflow_yml = "mlflow.yml"
    project_path = Path().cwd()
    project_metadata = _get_project_metadata(project_path)
    _add_src_to_path(project_metadata.source_dir, project_path)
    configure_project(project_metadata.package_name)
    session = KedroSession.create(
        project_metadata.package_name, project_path=project_path
    )
    context = session.load_context()
    mlflow_yml_path = project_path / context.CONF_ROOT / env / mlflow_yml

    # mlflow.yml is just a static file,
    # but the name of the experiment is set to be the same as the project
    if mlflow_yml_path.is_file() and not force:
        click.secho(
            click.style(
                f"A 'mlflow.yml' already exists at '{mlflow_yml_path}' You can use the ``--force`` option to override it.",
                fg="red",
            )
        )
    else:
        try:
            write_jinja_template(
                src=TEMPLATE_FOLDER_PATH / mlflow_yml,
                is_cookiecutter=False,
                dst=mlflow_yml_path,
                python_package=project_metadata.package_name,
            )
        except FileNotFoundError:
            click.secho(
                click.style(
                    f"No env '{env}' found. Please check this folder exists inside '{context.CONF_ROOT}' folder.",
                    fg="red",
                )
            )
        if not silent:
            click.secho(
                click.style(
                    f"'{context.CONF_ROOT}/{env}/{mlflow_yml}' successfully updated.",
                    fg="green",
                )
            )
def mock_session(
    mocker, mock_settings_with_mlflow_hooks, kedro_project_path
):  # pylint: disable=unused-argument

    # we need to patch "kedro.framework.session.session.validate_settings" instead of
    # "kedro.framework.project.validate_settings" because it is imported
    mocker.patch("kedro.framework.session.session.validate_settings")
    # idem, we patch we need to patch "kedro.framework.session.session._register_hooks_setuptools" instead of
    # "kedro.framework.hooks.manager._register_hooks_setuptools" because it is imported

    mocker.patch(
        "kedro.framework.session.session._register_hooks_setuptools"
    )  # prevent registering the one of the plugins which are already installed
    configure_project(MOCK_PACKAGE_NAME)
    return KedroSession.create(MOCK_PACKAGE_NAME, kedro_project_path)
Пример #28
0
    def test_register_pipelines_with_duplicate_entries(self, tmp_path,
                                                       mock_pipelines, mocker):
        mocker.patch("kedro.framework.project._validate_module")
        pattern = ("Found duplicate pipeline entries. The following "
                   "will be overwritten: __default__")
        with pytest.warns(UserWarning, match=re.escape(pattern)):
            configure_project(MOCK_PACKAGE_NAME)

        session = KedroSession.create(MOCK_PACKAGE_NAME, tmp_path)
        context = session.load_context()
        # check that all pipeline dictionaries merged together correctly
        expected_pipelines = {
            key: CONTEXT_PIPELINE
            for key in ("__default__", "de", "pipe")
        }
        assert mock_pipelines == expected_pipelines
        assert context.pipelines == expected_pipelines
Пример #29
0
def test_on_pipeline_error(kedro_project_with_mlflow_conf):

    tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri()

    project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf)
    _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
        package_name=project_metadata.package_name,
        project_path=kedro_project_with_mlflow_conf,
    ):

        def failing_node():
            mlflow.start_run(nested=True)
            raise ValueError("Let's make this pipeline fail")

        class DummyContextWithHook(KedroContext):
            project_name = "fake project"
            package_name = "fake_project"
            project_version = "0.16.5"

            hooks = (MlflowPipelineHook(),)

            def _get_pipeline(self, name: str = None) -> Pipeline:
                return Pipeline(
                    [
                        node(
                            func=failing_node,
                            inputs=None,
                            outputs="fake_output",
                        )
                    ]
                )

        with pytest.raises(ValueError):
            failing_context = DummyContextWithHook(
                "fake_package", kedro_project_with_mlflow_conf.as_posix()
            )
            failing_context.run()

        # the run we want is the last one in Default experiment
        failing_run_info = MlflowClient(tracking_uri).list_run_infos("0")[0]
        assert mlflow.active_run() is None  # the run must have been closed
        assert failing_run_info.status == RunStatus.to_string(
            RunStatus.FAILED
        )  # it must be marked as failed
Пример #30
0
def test_node_hook_logging_above_limit_truncate_strategy(
        kedro_project, dummy_run_params, param_length):

    _write_yaml(
        kedro_project / "conf" / "local" / "mlflow.yml",
        dict(hooks=dict(node=dict(long_parameters_strategy="truncate")), ),
    )

    mlflow_tracking_uri = (kedro_project / "mlruns").as_uri()
    mlflow.set_tracking_uri(mlflow_tracking_uri)

    mlflow_node_hook = MlflowNodeHook()

    param_value = param_length * "a"
    node_inputs = {"params:my_param": param_value}

    project_metadata = _get_project_metadata(kedro_project)
    _add_src_to_path(project_metadata.source_dir, kedro_project)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
            package_name=project_metadata.package_name,
            project_path=kedro_project,
    ):
        with mlflow.start_run():
            mlflow_node_hook.before_pipeline_run(
                run_params=dummy_run_params,
                pipeline=Pipeline([]),
                catalog=DataCatalog(),
            )
            mlflow_node_hook.before_node_run(
                node=node(func=lambda x: x, inputs=dict(x="a"), outputs=None),
                catalog=DataCatalog(),  # can be empty
                inputs=node_inputs,
                is_async=False,
                run_id="132",
            )
            run_id = mlflow.active_run().info.run_id

        mlflow_client = MlflowClient(mlflow_tracking_uri)
        current_run = mlflow_client.get_run(run_id)
        assert current_run.data.params == {
            "my_param": param_value[0:MAX_PARAM_VAL_LENGTH]
        }