def test_invalid_path(self, tmp_path): """Test for loading context from an invalid path. """ other_path = tmp_path / "other" other_path.mkdir() pattern = r"Could not find '\.kedro\.yml'" with pytest.raises(KedroContextError, match=pattern): load_context(str(other_path))
def test_kedro_yml_invalid_format(self, fake_repo_path): """Test for loading context from an invalid path. """ kedro_yml_path = fake_repo_path / ".kedro.yml" kedro_yml_path.write_text("!!") # Invalid YAML pattern = r"Failed to parse '\.kedro\.yml' file" with pytest.raises(KedroContextError, match=pattern): load_context(str(fake_repo_path))
def test_invalid_path(self, tmp_path): """Test for loading context from an invalid path. """ other_path = tmp_path / "other" other_path.mkdir() pattern = "Could not find the project configuration file 'pyproject.toml'" with pytest.raises(RuntimeError, match=re.escape(pattern)): load_context(str(other_path))
def test_kedro_yml_has_no_context_path(self, fake_repo_path): """Test for loading context from an invalid path. """ kedro_yml_path = fake_repo_path / ".kedro.yml" kedro_yml_path.write_text("fake_key: fake_value\nsource_dir: src\n") pattern = r"'\.kedro\.yml' doesn't have a required `context_path` field" with pytest.raises(KedroContextError, match=pattern): load_context(str(fake_repo_path))
def test_kedro_yml_has_no_context_path(self, fake_repo_path): """Test for loading context from an invalid path. """ payload = {"fake_key": "fake_value", "project_version": kedro_version} _create_kedro_config(fake_repo_path, payload) pattern = "'.kedro.yml' doesn't have a required `context_path` field" with pytest.raises(KedroContextError, match=re.escape(pattern)): load_context(str(fake_repo_path))
def test_invalid_path(self, tmp_path): """Test for loading context from an invalid path. """ other_path = tmp_path / "other" other_path.mkdir() pattern = ( "Could not find any of configuration files '.kedro.yml, pyproject.toml'" ) with pytest.raises(KedroContextError, match=re.escape(pattern)): load_context(str(other_path))
def reload_kedro(path, line=None): """Line magic which reloads all Kedro default variables.""" global startup_error global context global catalog try: import kedro.config.default_logger # noqa from kedro.framework.cli.jupyter import collect_line_magic from kedro.framework.context import load_context except ImportError: logging.error( "Kedro appears not to be installed in your current environment " "or your current IPython session was not started in a valid Kedro project." ) raise try: path = path or project_path # remove cached user modules context = load_context(path) to_remove = [ mod for mod in sys.modules if mod.startswith(context.package_name) ] # `del` is used instead of `reload()` because: If the new version of a module does not # define a name that was defined by the old version, the old definition remains. for module in to_remove: del sys.modules[module] # clear hook manager; hook implementations will be re-registered when the # context is instantiated again in `load_context()` below hook_manager = get_hook_manager() name_plugin_pairs = hook_manager.list_name_plugin() for name, plugin in name_plugin_pairs: hook_manager.unregister(name=name, plugin=plugin) logging.debug("Loading the context from %s", str(path)) # Reload context to fix `pickle` related error (it is unable to serialize reloaded objects) # Some details can be found here: # https://modwsgi.readthedocs.io/en/develop/user-guides/issues-with-pickle-module.html#packing-and-script-reloading context = load_context(path) catalog = context.catalog logging.info("** Kedro project %s", str(context.project_name)) logging.info("Defined global variable `context` and `catalog`") for line_magic in collect_line_magic(): register_line_magic(needs_local_scope(line_magic)) logging.info("Registered line magic `%s`", line_magic.__name__) except Exception as err: startup_error = err logging.exception("Kedro's ipython session startup script failed:\n%s", str(err)) raise err
def test_pyproject_toml_has_missing_mandatory_keys(self, fake_repo_path): payload = { "tool": { "kedro": {"fake_key": "fake_value", "project_version": kedro_version} } } _create_kedro_config(fake_repo_path, payload) pattern = ( "Missing required keys ['package_name', 'project_name'] " "from 'pyproject.toml'." ) with pytest.raises(RuntimeError, match=re.escape(pattern)): load_context(str(fake_repo_path))
def test_source_path_does_not_exist(self, fake_repo_path, fake_package_name): """Test for a valid source_dir pattern, but it does not exist. """ kedro_yml_path = fake_repo_path / ".kedro.yml" source_dir = "non_existent" kedro_yml_path.write_text( f"context_path: {fake_package_name}.run.ProjectContext\nsource_dir: {source_dir}\n" ) non_existent_path = (fake_repo_path / source_dir).expanduser().resolve() pattern = r"Source path '{}' cannot be found".format(non_existent_path) with pytest.raises(KedroContextError, match=pattern): load_context(str(fake_repo_path))
def test_settings_py_has_context_path( self, fake_repo_path, fake_package_name, mocker ): """Test for loading custom `ProjectContext` context. """ payload = { "tool": { "kedro": { "package_name": fake_package_name, "project_version": kedro_version, "project_name": "fake_project", } } } _create_kedro_config(fake_repo_path, payload) settings_mock = mocker.patch( "kedro.framework.context.context._get_project_settings", side_effect=(MyContext, (), (), "conf"), ) context = load_context(str(fake_repo_path)) assert isinstance(context, KedroContext) assert context.__class__ is not KedroContext assert context.__class__.__name__ == "MyContext" settings_mock.assert_called_once_with( fake_package_name, "CONTEXT_CLASS", KedroContext )
def delete_pipeline(name, env, yes): """Delete a modular pipeline by providing the pipeline name as an argument.""" try: context = load_context(Path.cwd(), env=env) except Exception as err: # pylint: disable=broad-except _handle_exception( f"Unable to load Kedro context with environment `{env}`. " f"Make sure it exists in the project configuration.\nError: {err}" ) package_dir = _get_project_package_dir(context) env = env or "base" pipeline_artifacts = _get_pipeline_artifacts(context, pipeline_name=name, env=env) dirs = [path for path in pipeline_artifacts if path.is_dir()] if not yes: click.echo( "The following directories and everything within them will be removed:\n" ) click.echo(indent("\n".join(str(dir_) for dir_ in dirs), " " * 2)) click.echo() yes = click.confirm(f"Are you sure you want to delete pipeline `{name}`?") click.echo() if not yes: raise KedroCliError("Deletion aborted!") _delete_dirs(*dirs) click.secho(f"\nPipeline `{name}` was successfully deleted.\n", fg="green") click.secho( f"If you added the pipeline `{name}` to `create_pipelines()` in " f"`{package_dir / 'pipeline.py'}`, you will need to remove it.`", fg="yellow", )
def init(force, silent): """Updates the template of a kedro project. Running this command is mandatory to use kedro-mlflow. 2 actions are performed : 1. Add "conf/base/mlflow.yml": This is a configuration file used for run parametrization when calling "kedro run" command. See INSERT_DOC_URL for further details. 2. Modify "src/YOUR_PACKAGE_NAME/run.py" to add mlflow hooks to the ProjectContext. This will erase your current "run.py" script and all your modifications will be lost. If you do not want to erase "run.py", insert the hooks manually """ # get constants project_path = Path().cwd() project_globals = get_static_project_data(project_path) context = load_context(project_path) conf_root = context.CONF_ROOT # mlflow.yml is just a static file, # but the name of the experiment is set to be the same as the project mlflow_yml = "mlflow.yml" write_jinja_template( src=TEMPLATE_FOLDER_PATH / mlflow_yml, is_cookiecutter=False, dst=project_path / conf_root / "base" / mlflow_yml, python_package=project_globals["package_name"], ) if not silent: click.secho( click.style( f"'{conf_root}/base/mlflow.yml' successfully updated.", fg="green" ) )
def before_pipeline_run(self, run_params: Dict[str, Any], pipeline: Pipeline, catalog: DataCatalog) -> None: """Hook to be invoked before a pipeline runs. Args: run_params: The params needed for the given run. Should be identical to the data logged by Journal. # @fixme: this needs to be modelled explicitly as code, instead of comment Schema: { "run_id": str, "project_path": str, "env": str, "kedro_version": str, "tags": Optional[List[str]], "from_nodes": Optional[List[str]], "to_nodes": Optional[List[str]], "node_names": Optional[List[str]], "from_inputs": Optional[List[str]], "load_versions": Optional[List[str]], "pipeline_name": str, "extra_params": Optional[Dict[str, Any]], } pipeline: The ``Pipeline`` that will be run. catalog: The ``DataCatalog`` to be used during the run. """ self.context = load_context( project_path=run_params["project_path"], env=run_params["env"], extra_params=run_params["extra_params"], ) config = get_mlflow_config(self.context) self.flatten = config.node_hook_opts["flatten_dict_params"] self.recursive = config.node_hook_opts["recursive"] self.sep = config.node_hook_opts["sep"]
def before_pipeline_run(self, run_params: Dict[str, Any], pipeline: Pipeline, catalog: DataCatalog) -> None: """Hook to be invoked before a pipeline runs. Args: run_params: The params needed for the given run. Should be identical to the data logged by Journal. # @fixme: this needs to be modelled explicitly as code, instead of comment Schema: { "run_id": str, "project_path": str, "env": str, "kedro_version": str, "tags": Optional[List[str]], "from_nodes": Optional[List[str]], "to_nodes": Optional[List[str]], "node_names": Optional[List[str]], "from_inputs": Optional[List[str]], "load_versions": Optional[List[str]], "pipeline_name": str, "extra_params": Optional[Dict[str, Any]], } pipeline: The ``Pipeline`` that will be run. catalog: The ``DataCatalog`` to be used during the run. """ self.context = load_context( project_path=run_params["project_path"], env=run_params["env"], extra_params=run_params["extra_params"], ) mlflow_conf = get_mlflow_config(self.context) mlflow_conf.setup(self.context) run_name = (mlflow_conf.run_opts["name"] if mlflow_conf.run_opts["name"] is not None else run_params["pipeline_name"]) mlflow.start_run( run_id=mlflow_conf.run_opts["id"], experiment_id=mlflow_conf.experiment.experiment_id, run_name=run_name, nested=mlflow_conf.run_opts["nested"], ) # Set tags only for run parameters that have values. mlflow.set_tags({k: v for k, v in run_params.items() if v}) # add manually git sha for consistency with the journal # TODO : this does not take into account not committed files, so it # does not ensure reproducibility. Define what to do. mlflow.set_tag("git_sha", _git_sha(run_params["project_path"])) mlflow.set_tag( "kedro_command", _generate_kedro_command( tags=run_params["tags"], node_names=run_params["node_names"], from_nodes=run_params["from_nodes"], to_nodes=run_params["to_nodes"], from_inputs=run_params["from_inputs"], load_versions=run_params["load_versions"], pipeline_name=run_params["pipeline_name"], ), )
def test_catalog_and_params(self, dummy_project, fake_kedro_cli): """Test that catalog and parameter configs generated in pipeline sections propagate into the context""" pipelines_dir = dummy_project / "src" / PACKAGE_NAME / "pipelines" assert pipelines_dir.is_dir() cmd = ["pipeline", "create", PIPELINE_NAME] result = CliRunner().invoke(fake_kedro_cli.cli, cmd) assert result.exit_code == 0 # write pipeline catalog pipe_conf_dir = dummy_project / "conf" / "base" / "pipelines" / PIPELINE_NAME catalog_dict = { "ds_from_pipeline": { "type": "pandas.CSVDataSet", "filepath": "data/01_raw/iris.csv", } } with (pipe_conf_dir / "catalog.yml").open("w") as f: yaml.dump(catalog_dict, f) # write pipeline parameters params_dict = {"params_from_pipeline": {"p1": [1, 2, 3], "p2": None}} with (pipe_conf_dir / "parameters.yml").open("w") as f: yaml.dump(params_dict, f) ctx = load_context(Path.cwd()) assert isinstance(ctx.catalog._data_sets["ds_from_pipeline"], CSVDataSet) assert isinstance(ctx.catalog.load("ds_from_pipeline"), DataFrame) assert ctx.params["params_from_pipeline"] == params_dict[ "params_from_pipeline"]
def _install_files(package_name: str, source_path: Path, env: str = None, alias: str = None): env = env or "base" context = load_context(Path.cwd(), env=env) package_source, test_source, conf_source = _get_package_artifacts( source_path, package_name) pipeline_name = alias or package_name package_dest, test_dest, conf_dest = _get_pipeline_artifacts( context, pipeline_name=pipeline_name, env=env) if conf_source.is_dir(): _sync_dirs(conf_source, conf_dest) # `config` was packaged under `package_name` directory with `kedro pipeline package`. # Since `config` was already synced, we don't want to send it again # when syncing the package, so we remove it. shutil.rmtree(str(conf_source)) if test_source.is_dir(): _sync_dirs(test_source, test_dest) # Sync everything under package directory, except `config` since we already sent it. if package_source.is_dir(): _sync_dirs(package_source, package_dest)
def test_valid_context_with_env(self, mocker, monkeypatch, fake_repo_path): """Test getting project context when Kedro config environment is specified in the environment variable. """ mocker.patch("kedro.config.config.ConfigLoader.get") monkeypatch.setenv("KEDRO_ENV", "my_fake_env") result = load_context(str(fake_repo_path)) assert result.env == "my_fake_env"
def test_kedro_yml_invalid_source_dir_pattern(self, fake_repo_path, source_dir, fake_package_name): """Test for invalid pattern for source_dir that is not relative to the project path. """ kedro_yml_path = fake_repo_path / ".kedro.yml" kedro_yml_path.write_text( f"context_path: {fake_package_name}.run.ProjectContext\nsource_dir: {source_dir}\n" ) source_path = (fake_repo_path / Path(source_dir).expanduser()).resolve() pattern = re.escape( f"Source path '{source_path}' has to be relative to your project root " f"'{fake_repo_path.resolve()}'") with pytest.raises(KedroContextError, match=pattern): load_context(str(fake_repo_path))
def test_pyproject_toml_has_no_context_path(self, fake_repo_path): """Test for loading context from an invalid path. """ payload = { "tool": { "kedro": { "fake_key": "fake_value", "project_version": kedro_version } } } _create_kedro_config(fake_repo_path, payload, yml=False) (fake_repo_path / ".kedro.yml").unlink() pattern = "'pyproject.toml' doesn't have a required `context_path` field" with pytest.raises(KedroContextError, match=re.escape(pattern)): load_context(str(fake_repo_path))
def get_project_context(key: str = "context", **kwargs) -> Any: """Gets the context value from context associated with the key. Args: key: Optional key to get associated value from Kedro context. Supported keys are "verbose" and "context", and it defaults to "context". kwargs: Optional custom arguments defined by users, which will be passed into the constructor of the projects KedroContext subclass. Returns: Requested value from Kedro context dictionary or the default if the key was not found. Raises: KedroCliError: When the key is not found and the default value was not specified. """ def _deprecation_msg(key): msg_dict = { "get_config": ["config_loader", "ConfigLoader"], "create_catalog": ["catalog", "DataCatalog"], "create_pipeline": ["pipeline", "Pipeline"], "template_version": ["project_version", None], "project_name": ["project_name", None], "project_path": ["project_path", None], } attr, obj_name = msg_dict[key] msg = '`get_project_context("{}")` is now deprecated. '.format(key) if obj_name: msg += ( "This is still returning a function that returns `{}` " "instance, however passed arguments have no effect anymore " "since Kedro 0.15.0. ".format(obj_name)) msg += ( "Please get `KedroContext` instance by calling `get_project_context()` " "and use its `{}` attribute.".format(attr)) return msg context = load_context(Path.cwd(), **kwargs) # Dictionary to be compatible with existing Plugins. Future plugins should # retrieve necessary Kedro project properties from context value = { "context": context, "get_config": lambda project_path, env=None, **kw: context.config_loader, "create_catalog": lambda config, **kw: context.catalog, "create_pipeline": lambda **kw: context.pipeline, "template_version": context.project_version, "project_name": context.project_name, "project_path": context.project_path, "verbose": _VERBOSE, }[key] if key not in ("verbose", "context"): warnings.warn(_deprecation_msg(key), DeprecationWarning) return deepcopy(value)
def _call_viz( host=None, port=None, browser=None, load_file=None, save_file=None, pipeline_name=None, env=None, project_path=None, ): global _DATA # pylint: disable=global-statement,invalid-name global _CATALOG # pylint: disable=global-statement if load_file: # Remove all handlers for root logger root_logger = logging.getLogger() root_logger.handlers = [] _DATA = _load_from_file(load_file) else: try: project_path = project_path or Path.cwd() if KEDRO_VERSION.match(">=0.17.0"): # pragma: no cover from kedro.framework.session import KedroSession from kedro.framework.startup import ( # pylint: disable=no-name-in-module,import-error _get_project_metadata, ) package_name = _get_project_metadata(project_path).package_name session_kwargs = dict( package_name=package_name, project_path=project_path, env=env, save_on_close=False, ) session = KedroSession.create( # pylint: disable=unexpected-keyword-arg **session_kwargs ) context = session.load_context() # pylint: disable=no-member pipelines = _get_pipelines_from_context(context, pipeline_name) else: # pragma: no cover context = load_context(project_path=project_path, env=env) pipelines = _get_pipelines_from_context(context, pipeline_name) except KedroContextError: raise KedroCliError(ERROR_PROJECT_ROOT) # pragma: no cover _CATALOG = context.catalog _DATA = format_pipelines_data(pipelines) if save_file: Path(save_file).write_text(json.dumps(_DATA, indent=4, sort_keys=True)) else: is_localhost = host in ("127.0.0.1", "localhost", "0.0.0.0") if browser and is_localhost: webbrowser.open_new("http://{}:{:d}/".format(host, port)) app.run(host=host, port=port)
def test_source_path_does_not_exist(self, fake_repo_path, fake_package_name): """Test for a valid source_dir pattern, but it does not exist. """ source_dir = "non_existent" payload = { "context_path": f"{fake_package_name}.run.ProjectContext", "source_dir": source_dir, "project_version": kedro_version, "project_name": "Test Project", } _create_kedro_config(fake_repo_path, payload) non_existent_path = (fake_repo_path / source_dir).expanduser().resolve() pattern = f"Source path '{non_existent_path}' cannot be found" with pytest.raises(KedroContextError, match=re.escape(pattern)): load_context(str(fake_repo_path))
def _load_project_context(**kwargs): """Returns project context.""" try: return load_context(Path.cwd(), **kwargs) except Exception as err: # pylint: disable=broad-except env = kwargs.get("env") _handle_exception( f"Unable to load Kedro context with environment `{env}`. " f"Make sure it exists in the project configuration.\nError: {err}")
def test_valid_context(self, fake_repo_path, mocker): """Test getting project context.""" get_project_metadata_mock = mocker.patch( "kedro.framework.context.context._get_project_metadata", wraps=_get_project_metadata, ) result = load_context(str(fake_repo_path)) assert result.package_name == "fake_package" assert str(fake_repo_path.resolve() / "src") in sys.path get_project_metadata_mock.assert_called_with(fake_repo_path)
def test_get_mlflow_config_in_uninitialized_project(mocker, tmp_path, config_dir): # config_with_base_mlflow_conf is a pytest.fixture in conftest mocker.patch("logging.config.dictConfig") mocker.patch("kedro_mlflow.utils._is_kedro_project", return_value=True) context = load_context(tmp_path) with pytest.raises( KedroMlflowConfigError, match="No 'mlflow.yml' config file found in environment"): get_mlflow_config(context)
def test_kedro_yml_invalid_source_dir_pattern(self, fake_repo_path, source_dir, fake_package_name): """Test for invalid pattern for source_dir that is not relative to the project path. """ payload = { "context_path": f"{fake_package_name}.run.ProjectContext", "source_dir": source_dir, "project_version": kedro_version, "project_name": "Test Project", } _create_kedro_config(fake_repo_path, payload) source_path = (fake_repo_path / Path(source_dir).expanduser()).resolve() pattern = ( f"Source path '{source_path}' has to be relative to your project root " f"'{fake_repo_path.resolve()}'") with pytest.raises(KedroContextError, match=re.escape(pattern)): load_context(str(fake_repo_path))
def suite_new(directory, empty, replace, batch_kwargs): """ Create Great Expectation Suites based on the kedro catalog using the BasicSuiteBuilderProfiler. If you wish to create suites without using the BasicSuiteBuilderProfiler, add the `--empty` flag. """ kedro_context = load_context(Path.cwd()) ge_context = toolkit.load_data_context_with_error_handling(directory) generate_basic_suites(kedro_context, ge_context, empty, replace, batch_kwargs)
def test_pyproject_toml_has_extra_keys(self, fake_repo_path, fake_package_name): project_name = "Test Project" payload = { "tool": { "kedro": { "project_version": kedro_version, "project_name": project_name, "package_name": fake_package_name, "unexpected_key": "hello", } } } _create_kedro_config(fake_repo_path, payload) pattern = ("Found unexpected keys in 'pyproject.toml'. Make sure it " "only contains the following keys: ['package_name', " "'project_name', 'project_version', 'source_dir'].") with pytest.raises(RuntimeError, match=re.escape(pattern)): load_context(str(fake_repo_path))
def init(target_directory, usage_stats): """ Create a new Great Expectations project configuration and fill in the Datasources and Suites based on the kedro catalog """ from kedro.framework.context import load_context target_directory = os.path.abspath(target_directory) ge_dir = _get_full_path_to_ge_dir(target_directory) if not DataContext.does_config_exist_on_disk(ge_dir): if not click.confirm(LETS_BEGIN_PROMPT, default=True): cli_message(RUN_INIT_AGAIN) # TODO ensure this is covered by a test exit(0) try: DataContext.create(target_directory, usage_statistics_enabled=usage_stats) cli_message(SETUP_SUCCESS) except DataContextError as e: cli_message("<red>{}</red>".format(e.message)) exit(5) if click.confirm("Generate Datasources based on Kedro Context?", default=True): kedro_context = load_context(Path.cwd()) ge_context = toolkit.load_data_context_with_error_handling(ge_dir) new_datasources = generate_datasources(kedro_context, ge_context) if new_datasources: cli_message( "Added {} New datasources to your project.".format(len(new_datasources)) ) if click.confirm( "Generate Basic Validation Suites based on Kedro Context?", default=True ): kedro_context = load_context(Path.cwd()) ge_context = toolkit.load_data_context_with_error_handling(ge_dir) new_datasources = generate_basic_suites(kedro_context, ge_context) if new_datasources: cli_message( "Added {} New datasources to your project.".format(len(new_datasources)) )
def package_pipeline(name, env, alias, destination): """Package up a pipeline for easy distribution. A .whl file will be created in a `<source_dir>/dist/`.""" context = load_context(Path.cwd(), env=env) result_path = _package_pipeline( name, context, package_name=alias, destination=destination, env=env ) as_alias = f" as `{alias}`" if alias else "" message = f"Pipeline `{name}` packaged{as_alias}! Location: {result_path}" click.secho(message, fg="green")