def test_invalid_path(self, tmp_path): """Test for loading context from an invalid path. """ other_path = tmp_path / "other" other_path.mkdir() pattern = r"Could not find '\.kedro\.yml'" with pytest.raises(KedroContextError, match=pattern): load_context(str(other_path))
def test_kedro_yml_has_no_context_path(self, fake_repo_path): """Test for loading context from an invalid path. """ kedro_yml_path = fake_repo_path / ".kedro.yml" kedro_yml_path.write_text("fake_key: fake_value\nsource_dir: src\n") pattern = r"'\.kedro\.yml' doesn't have a required `context_path` field" with pytest.raises(KedroContextError, match=pattern): load_context(str(fake_repo_path))
def test_kedro_yml_invalid_format(self, fake_repo_path): """Test for loading context from an invalid path. """ kedro_yml_path = fake_repo_path / ".kedro.yml" kedro_yml_path.write_text("!!") # Invalid YAML pattern = r"Failed to parse '\.kedro\.yml' file" with pytest.raises(KedroContextError, match=pattern): load_context(str(fake_repo_path))
def test_source_path_does_not_exist(self, fake_repo_path): """Test for a valid source_dir pattern, but it does not exist. """ kedro_yml_path = fake_repo_path / ".kedro.yml" source_dir = "non_existent" kedro_yml_path.write_text( "context_path: fake_package.run.ProjectContext\nsource_dir: {}\n".format( source_dir ) ) non_existent_path = (fake_repo_path / source_dir).expanduser().resolve() pattern = r"Source path '{}' cannot be found".format(non_existent_path) with pytest.raises(KedroContextError, match=pattern): load_context(str(fake_repo_path))
def test_load(fake_project, tmp_path): """Test getting project context""" result = load_context(str(fake_project)) assert result == "fake" assert str(fake_project.resolve()) in sys.path assert os.getcwd() == str(fake_project.resolve()) other_path = tmp_path / "other" other_path.mkdir() pattern = ( "Cannot load context for `{}`, since another project " "`.*` has already been loaded".format(other_path.resolve()) ) with pytest.raises(KedroContextError, match=pattern): load_context(str(other_path))
def reload_kedro(project_path, line=None): """"Line magic which reloads all Kedro default variables.""" global startup_error global context global catalog try: import kedro.config.default_logger from kedro.context import load_context context = load_context(project_path) catalog = context.catalog logging.info("** Kedro project {}".format(context.project_name)) logging.info("Defined global variable `context` and `catalog`") except ImportError: logging.error( "Kedro appears not to be installed in your current environment " "or your current IPython session was not started in a valid Kedro project." ) raise except Exception as err: startup_error = err logging.error("Kedro's ipython session startup script failed:\n%s", str(err)) raise err
def test_kedro_mlflow_config_setup_tracking_priority(mocker, tmp_path, config_dir): """Test if the mlflow_tracking uri set is the one of mlflow.yml if it also eist in credentials. Args: mocker ([type]): [description] tmp_path ([type]): [description] config_dir ([type]): [description] """ # create a ".kedro.yml" file to identify "tmp_path" as the root of a kedro project mocker.patch("kedro_mlflow.utils._is_kedro_project", lambda x: True) (tmp_path / "conf/base/credentials.yml").write_text( yaml.dump(dict(my_mlflow_creds=dict(mlflow_tracking_uri="mlruns2")))) config = KedroMlflowConfig( project_path=tmp_path, mlflow_tracking_uri="mlruns1", credentials="my_mlflow_creds", ) context = load_context(tmp_path) config.setup(context) assert mlflow.get_tracking_uri() == (tmp_path / "mlruns1").as_uri()
def convert_notebook(all_flag, overwrite_flag, filepath): """Convert selected or all notebooks found in a Kedro project to Kedro code, by exporting code from the appropriately-tagged cells: Cells tagged as `node` will be copied over to a Python file matching the name of the notebook, under `src/<package_name>/nodes`. *Note*: Make sure your notebooks have unique names! FILEPATH: Path(s) to exact notebook file(s) to be converted. Both relative and absolute paths are accepted. Should not be provided if --all flag is already present. """ context = load_context(Path.cwd()) if not filepath and not all_flag: secho( "Please specify a notebook filepath " "or add '--all' to convert all notebooks." ) sys.exit(1) kedro_project_path = context.project_path kedro_package_name = "kedro_demo_feb2020" if all_flag: # pathlib glob does not ignore hidden directories, # whereas Python glob does, which is more useful in # ensuring checkpoints will not be included pattern = kedro_project_path / "**" / "*.ipynb" notebooks = sorted(Path(p) for p in iglob(str(pattern), recursive=True)) else: notebooks = [Path(f) for f in filepath] counter = Counter(n.stem for n in notebooks) non_unique_names = [name for name, counts in counter.items() if counts > 1] if non_unique_names: raise KedroCliError( "Found non-unique notebook names! " "Please rename the following: {}".format(", ".join(non_unique_names)) ) for notebook in notebooks: secho("Converting notebook '{}'...".format(str(notebook))) output_path = ( kedro_project_path / "src" / kedro_package_name / "nodes" / "{}.py".format(notebook.stem) ) if output_path.is_file(): overwrite = overwrite_flag or click.confirm( "Output file {} already exists. Overwrite?".format(str(output_path)), default=False, ) if overwrite: export_nodes(notebook, output_path) else: export_nodes(notebook, output_path) secho("Done!")
def reload_kedro(path, line=None): """"Line magic which reloads all Kedro default variables.""" global startup_error global context global io try: import kedro.config.default_logger from kedro.context import load_context from kedro.cli.jupyter import collect_line_magic except ImportError: logging.error( "Kedro appears not to be installed in your current environment " "or your current IPython session was not started in a valid Kedro project." ) raise try: path = path or project_path logging.debug("Loading the context from %s", str(path)) context = load_context(path) io = context.catalog logging.info("** Kedro project %s", str(context.project_name)) logging.info("Defined global variable `context` and `catalog`") for line_magic in collect_line_magic(): register_line_magic(line_magic) logging.info("Registered line magic `%s`", line_magic.__name__) except Exception as err: startup_error = err logging.exception("Kedro's ipython session startup script failed:\n%s", str(err)) raise err
def test_kedro_yml_invalid_source_dir_pattern(self, fake_repo_path, source_dir): """Test for invalid pattern for source_dir that is not relative to the project path. """ kedro_yml_path = fake_repo_path / ".kedro.yml" kedro_yml_path.write_text( f"context_path: fake_package.run.ProjectContext\nsource_dir: {source_dir}\n" ) source_path = (fake_repo_path / Path(source_dir).expanduser()).resolve() pattern = re.escape( f"Source path '{source_path}' has to be relative to your project root " f"'{fake_repo_path.resolve()}'") with pytest.raises(KedroContextError, match=pattern): load_context(str(fake_repo_path))
def run( tag, env, parallel, runner, node_names, to_nodes, from_nodes, from_inputs, load_version, pipeline, config, params, ): """Run the pipeline.""" if parallel and runner: raise KedroCliError( "Both --parallel and --runner options cannot be used together. " "Please use either --parallel or --runner.") if parallel: runner = "ParallelRunner" runner_class = load_obj(runner, "kedro.runner") if runner else SequentialRunner context = load_context(Path.cwd(), env=env, extra_params=params) context.run( tags=tag, runner=runner_class(), node_names=node_names, from_nodes=from_nodes, to_nodes=to_nodes, from_inputs=from_inputs, load_versions=load_version, pipeline_name=pipeline, )
def test_valid_context(self, fake_project): """Test getting project context.""" result = load_context(str(fake_project)) assert result.project_name == "fake" assert result.project_version == __version__ assert str(fake_project.resolve()) in sys.path assert os.getcwd() == str(fake_project.resolve())
def get_project_context(key: Any, default: Any = NO_DEFAULT) -> Any: # pragma: no cover """Get a value from the project context. The user is responsible having the specified key in their project's context which typically is exposed in the ``__kedro_context__`` function in ``run.py`` Args: key: Key in Kedro context dictionary. default: Default value if the key is not found. If not provided and the key is not found, this will raise a ``KedroCliError``. Returns: Requested value from Kedro context dictionary or the default if the key was not found. Raises: KedroCliError: When the key is not found and the default value was not specified. """ _KEDRO_CONTEXT.update(load_context(Path.cwd())) try: value = _KEDRO_CONTEXT[key] except KeyError: if default is not NO_DEFAULT: return default _handle_exception("`{}` not found in the context returned by " "__get_kedro_context__".format(key)) return deepcopy(value)
def test_valid_context_with_env(self, mocker, monkeypatch, fake_repo_path): """Test getting project context when Kedro config environment is specified in the environment variable. """ mocker.patch("kedro.config.config.ConfigLoader.get") monkeypatch.setenv("KEDRO_ENV", "my_fake_env") result = load_context(str(fake_repo_path)) assert result.env == "my_fake_env"
def get_project_context(key: str = "context", **kwargs) -> Any: """Gets the context value from context associated with the key. Args: key: Optional key to get associated value from Kedro context. Supported keys are "verbose" and "context", and it defaults to "context". kwargs: Optional custom arguments defined by users, which will be passed into the constructor of the projects KedroContext subclass. Returns: Requested value from Kedro context dictionary or the default if the key was not found. Raises: KedroCliError: When the key is not found and the default value was not specified. """ def _deprecation_msg(key): msg_dict = { "get_config": ["config_loader", "ConfigLoader"], "create_catalog": ["catalog", "DataCatalog"], "create_pipeline": ["pipeline", "Pipeline"], "template_version": ["project_version", None], "project_name": ["project_name", None], "project_path": ["project_path", None], } attr, obj_name = msg_dict[key] msg = '`get_project_context("{}")` is now deprecated. '.format(key) if obj_name: msg += ( "This is still returning a function that returns `{}` " "instance, however passed arguments have no effect anymore " "since Kedro 0.15.0. ".format(obj_name)) msg += ( "Please get `KedroContext` instance by calling `get_project_context()` " "and use its `{}` attribute.".format(attr)) return msg context = load_context(Path.cwd(), **kwargs) # Dictionary to be compatible with existing Plugins. Future plugins should # retrieve necessary Kedro project properties from context value = { "context": context, "get_config": lambda project_path, env=None, **kw: context.config_loader, "create_catalog": lambda config, **kw: context.catalog, "create_pipeline": lambda **kw: context.pipeline, "template_version": context.project_version, "project_name": context.project_name, "project_path": context.project_path, "verbose": _VERBOSE, }[key] if key not in ("verbose", "context"): warnings.warn(_deprecation_msg(key), DeprecationWarning) return deepcopy(value)
def test_valid_context(self, mocker, fake_repo_path): """Test getting project context.""" # Disable logging.config.dictConfig in KedroContext._setup_logging as # it changes logging.config and affects other unit tests mocker.patch("logging.config.dictConfig") result = load_context(str(fake_repo_path)) assert result.project_name == "Test Project" assert result.project_version == kedro.__version__ assert str(fake_repo_path.resolve() / "src") in sys.path assert os.getcwd() == str(fake_repo_path.resolve())
def test_kedro_yml_missing_source_dir(self, fake_repo_path): """If source dir is missing (it is by default), `src` is used to import package due to backward compatibility. """ kedro_yml_path = fake_repo_path / ".kedro.yml" kedro_yml_path.write_text("context_path: fake_package.run.ProjectContext\n") result = load_context(str(fake_repo_path)) assert result.project_name == "Test Project" assert result.project_version == kedro.__version__ assert str(fake_repo_path.resolve() / "src") in sys.path
def test_kedro_mlflow_config_setup_export_credentials(mocker, tmp_path, config_dir): # create a ".kedro.yml" file to identify "tmp_path" as the root of a kedro project mocker.patch("kedro_mlflow.utils._is_kedro_project", lambda x: True) (tmp_path / "conf/base/credentials.yml").write_text( yaml.dump(dict(my_mlflow_creds=dict(fake_mlflow_cred="my_fake_cred")))) # the config must restore properly the experiment config = KedroMlflowConfig(project_path=tmp_path, credentials="my_mlflow_creds") context = load_context(tmp_path) config.setup(context) assert os.environ["fake_mlflow_cred"] == "my_fake_cred"
def test_kedro_mlflow_config_new_experiment_does_not_exists( mocker, tmp_path, config_dir): # create a ".kedro.yml" file to identify "tmp_path" as the root of a kedro project mocker.patch("kedro_mlflow.utils._is_kedro_project", return_value=True) config = KedroMlflowConfig( project_path=tmp_path, mlflow_tracking_uri="mlruns", experiment_opts=dict(name="exp1"), ) context = load_context(tmp_path) config.setup(context) assert "exp1" in [ exp.name for exp in config.mlflow_client.list_experiments() ]
async def predictor(model: str = 'rf_model'): """ API для предикта по модели outputs: row_index: int data: string predict: float """ if model == "rf_model": context = load_context("") output = context.run(pipeline_name='predict_api') return output
def reload_kedro(line=None): """"Line magic which reloads all Kedro default variables.""" global proj_dir global proj_name global conf global io global startup_error try: import kedro.config.default_logger from kedro.context import load_context proj_name = "test-project" logging.info("** Kedro project {}".format(proj_name)) project_context = load_context(proj_dir) conf = project_context["get_config"](proj_dir) io = project_context["create_catalog"](conf) logging.info( "Defined global variables proj_dir, proj_name, conf and io") except ImportError: logging.error( "Kedro appears not to be installed in your current environment.") raise except KeyError as err: startup_error = err if "create_catalog" in str(err): message = ("The function `create_catalog` is missing from " "test-project/src/" "test_project/run.py." "\nEither restore this function, or update " "test-project/" ".ipython/profile_default/startup/00-kedro-init.py.") elif "get_config" in str(err): message = ("The function `get_config` is missing from " "test-project/src/" "test_project/run.py." "\nEither restore this function, or update " "test-project/" ".ipython/profile_default/startup/00-kedro-init.py.") logging.error(message) raise err except Exception as err: startup_error = err logging.error("Kedro's ipython session startup script failed:\n%s", str(err)) raise err
def test_kedro_yml_valid_source_dir(self, mocker, monkeypatch, fake_repo_path, source_dir): """Test for loading context from an valid source dir. """ monkeypatch.delenv( "PYTHONPATH" ) # test we are also adding source_dir to PYTHONPATH as well kedro_yml_path = fake_repo_path / ".kedro.yml" kedro_yml_path.write_text( f"context_path: fake_package.run.ProjectContext\nsource_dir: {source_dir}\n" ) result = load_context(str(fake_repo_path)) assert result.project_name == "Test Project" assert result.project_version == kedro.__version__ assert str(fake_repo_path.resolve() / source_dir) in sys.path
def test_kedro_mlflow_config_setup_set_tracking_uri(mocker, tmp_path, config_dir): # create a ".kedro.yml" file to identify "tmp_path" as the root of a kedro project mocker.patch("kedro_mlflow.utils._is_kedro_project", lambda x: True) # create an experiment with the same name and then delete it mlflow_tracking_uri = (tmp_path / "awesome_tracking").as_uri() # the config must restore properly the experiment config = KedroMlflowConfig( project_path=tmp_path, mlflow_tracking_uri="awesome_tracking", experiment_opts=dict(name="exp1"), ) context = load_context(tmp_path) config.setup(context) assert mlflow.get_tracking_uri() == mlflow_tracking_uri
def reload_kedro(path, line=None): """"Line magic which reloads all Kedro default variables.""" global startup_error global context global catalog try: import kedro.config.default_logger from kedro.context import KEDRO_ENV_VAR, load_context from kedro.cli.jupyter import collect_line_magic except ImportError: logging.error( "Kedro appears not to be installed in your current environment " "or your current IPython session was not started in a valid Kedro project." ) raise try: path = path or project_path logging.debug("Loading the context from %s", str(path)) context = load_context(path, env=os.getenv(KEDRO_ENV_VAR)) catalog = context.catalog # remove cached user modules package_name = context.__module__.split(".")[0] to_remove = [ mod for mod in sys.modules if mod.startswith(package_name) ] for module in to_remove: del sys.modules[module] logging.info("** Kedro project %s", str(context.project_name)) logging.info("Defined global variable `context` and `catalog`") for line_magic in collect_line_magic(): register_line_magic(line_magic) logging.info("Registered line magic `%s`", line_magic.__name__) except Exception as err: startup_error = err logging.exception("Kedro's ipython session startup script failed:\n%s", str(err)) raise err
def list_datasets(pipeline, env): """Show datasets per type.""" title = "DataSets in '{}' pipeline" not_mentioned = "Datasets not mentioned in pipeline" mentioned = "Datasets mentioned in pipeline" context = load_context(Path.cwd(), env=env) datasets_meta = context.catalog._data_sets catalog_ds = set(context.catalog.list()) pipelines = pipeline or context.pipelines.keys() result = {} for pipeline in pipelines: pl_obj = context.pipelines.get(pipeline) if pl_obj: pipeline_ds = pl_obj.data_sets() else: existing_pls = ", ".join(sorted(context.pipelines.keys())) raise KedroCliError( "{} pipeline not found! Existing pipelines: {}".format( pipeline, existing_pls)) unused_ds = catalog_ds - pipeline_ds default_ds = pipeline_ds - catalog_ds used_ds = catalog_ds - unused_ds unused_by_type = _map_type_to_datasets(unused_ds, datasets_meta) used_by_type = _map_type_to_datasets(used_ds, datasets_meta) if default_ds: used_by_type["DefaultDataSet"].extend(default_ds) data = ((not_mentioned, dict(unused_by_type)), (mentioned, dict(used_by_type))) result[title.format(pipeline)] = { key: value for key, value in data if value } secho(yaml.dump(result))
def main( tags: Iterable[str] = None, env: str = None, runner: Type[AbstractRunner] = None, node_names: Iterable[str] = None, from_nodes: Iterable[str] = None, to_nodes: Iterable[str] = None, from_inputs: Iterable[str] = None, ): """Application main entry point. Args: tags: An optional list of node tags which should be used to filter the nodes of the ``Pipeline``. If specified, only the nodes containing *any* of these tags will be run. env: An optional parameter specifying the environment in which the ``Pipeline`` should be run. runner: An optional parameter specifying the runner that you want to run the pipeline with. node_names: An optional list of node names which should be used to filter the nodes of the ``Pipeline``. If specified, only the nodes with these names will be run. from_nodes: An optional list of node names which should be used as a starting point of the new ``Pipeline``. to_nodes: An optional list of node names which should be used as an end point of the new ``Pipeline``. from_inputs: An optional list of input datasets which should be used as a starting point of the new ``Pipeline``. """ project_context = load_context(Path.cwd(), env=env) project_context.run( tags=tags, runner=runner, node_names=node_names, from_nodes=from_nodes, to_nodes=to_nodes, from_inputs=from_inputs, )
def test_kedro_mlflow_config_experiment_was_deleted(mocker, tmp_path, config_dir): # create a ".kedro.yml" file to identify "tmp_path" as the root of a kedro project mocker.patch("kedro_mlflow.utils._is_kedro_project", lambda x: True) # create an experiment with the same name and then delete it mlflow_tracking_uri = (tmp_path / "mlruns").as_uri() mlflow_client = MlflowClient(mlflow_tracking_uri) mlflow_client.create_experiment("exp1") mlflow_client.delete_experiment( mlflow_client.get_experiment_by_name("exp1").experiment_id) # the config must restore properly the experiment config = KedroMlflowConfig( project_path=tmp_path, mlflow_tracking_uri="mlruns", experiment_opts=dict(name="exp1"), ) context = load_context(tmp_path) config.setup(context) assert "exp1" in [ exp.name for exp in config.mlflow_client.list_experiments() ]
def run_package(): # entry point for running pip-installed projects # using `<project_package>` command project_context = load_context(Path.cwd()) project_context.run()
for key in ["dag", "conf", "macros", "task", "task_instance", "ti", "var"]: del airflow_context[key] # drop unpicklable things data_catalog.add_feed_dict({"airflow_context": airflow_context}, replace=True) # or add just the ones you need into Kedro parameters parameters = data_catalog.load("parameters") parameters["airflow_ds"] = airflow_context["ds"] data_catalog.save("parameters", parameters) return data_catalog # Construct a DAG and then call into Kedro to have the operators constructed dag = DAG(slugify("kedro-airflow-mushrooms"), default_args=default_args, schedule_interval=timedelta(days=1), catchup=False) _context = load_context(project_path) data_catalog = _context.catalog pipeline = _context.pipeline runner = AirflowRunner( dag=dag, process_context=process_context, operator_arguments=operator_specific_arguments, ) runner.run(pipeline, data_catalog)
def test_valid_context(self, fake_repo_path): """Test getting project context.""" result = load_context(str(fake_repo_path)) assert result.project_name == "Test Project" assert result.project_version == kedro.__version__ assert str(fake_repo_path.resolve() / "src") in sys.path