Пример #1
0
 def test_invalid_path(self, tmp_path):
     """Test for loading context from an invalid path. """
     other_path = tmp_path / "other"
     other_path.mkdir()
     pattern = r"Could not find '\.kedro\.yml'"
     with pytest.raises(KedroContextError, match=pattern):
         load_context(str(other_path))
Пример #2
0
 def test_kedro_yml_has_no_context_path(self, fake_repo_path):
     """Test for loading context from an invalid path. """
     kedro_yml_path = fake_repo_path / ".kedro.yml"
     kedro_yml_path.write_text("fake_key: fake_value\nsource_dir: src\n")
     pattern = r"'\.kedro\.yml' doesn't have a required `context_path` field"
     with pytest.raises(KedroContextError, match=pattern):
         load_context(str(fake_repo_path))
Пример #3
0
 def test_kedro_yml_invalid_format(self, fake_repo_path):
     """Test for loading context from an invalid path. """
     kedro_yml_path = fake_repo_path / ".kedro.yml"
     kedro_yml_path.write_text("!!")  # Invalid YAML
     pattern = r"Failed to parse '\.kedro\.yml' file"
     with pytest.raises(KedroContextError, match=pattern):
         load_context(str(fake_repo_path))
Пример #4
0
    def test_source_path_does_not_exist(self, fake_repo_path):
        """Test for a valid source_dir pattern, but it does not exist.
        """
        kedro_yml_path = fake_repo_path / ".kedro.yml"
        source_dir = "non_existent"
        kedro_yml_path.write_text(
            "context_path: fake_package.run.ProjectContext\nsource_dir: {}\n".format(
                source_dir
            )
        )
        non_existent_path = (fake_repo_path / source_dir).expanduser().resolve()

        pattern = r"Source path '{}' cannot be found".format(non_existent_path)
        with pytest.raises(KedroContextError, match=pattern):
            load_context(str(fake_repo_path))
Пример #5
0
def test_load(fake_project, tmp_path):
    """Test getting project context"""
    result = load_context(str(fake_project))
    assert result == "fake"
    assert str(fake_project.resolve()) in sys.path
    assert os.getcwd() == str(fake_project.resolve())

    other_path = tmp_path / "other"
    other_path.mkdir()
    pattern = (
        "Cannot load context for `{}`, since another project "
        "`.*` has already been loaded".format(other_path.resolve())
    )
    with pytest.raises(KedroContextError, match=pattern):
        load_context(str(other_path))
Пример #6
0
def reload_kedro(project_path, line=None):
    """"Line magic which reloads all Kedro default variables."""
    global startup_error
    global context
    global catalog
    try:
        import kedro.config.default_logger
        from kedro.context import load_context

        context = load_context(project_path)
        catalog = context.catalog
        logging.info("** Kedro project {}".format(context.project_name))

        logging.info("Defined global variable `context` and `catalog`")
    except ImportError:
        logging.error(
            "Kedro appears not to be installed in your current environment "
            "or your current IPython session was not started in a valid Kedro project."
        )
        raise
    except Exception as err:
        startup_error = err
        logging.error("Kedro's ipython session startup script failed:\n%s",
                      str(err))
        raise err
Пример #7
0
def test_kedro_mlflow_config_setup_tracking_priority(mocker, tmp_path,
                                                     config_dir):
    """Test if the mlflow_tracking uri set is the one of mlflow.yml
    if it also eist in credentials.

    Args:
        mocker ([type]): [description]
        tmp_path ([type]): [description]
        config_dir ([type]): [description]
    """
    # create a ".kedro.yml" file to identify "tmp_path" as the root of a kedro project
    mocker.patch("kedro_mlflow.utils._is_kedro_project", lambda x: True)

    (tmp_path / "conf/base/credentials.yml").write_text(
        yaml.dump(dict(my_mlflow_creds=dict(mlflow_tracking_uri="mlruns2"))))

    config = KedroMlflowConfig(
        project_path=tmp_path,
        mlflow_tracking_uri="mlruns1",
        credentials="my_mlflow_creds",
    )
    context = load_context(tmp_path)
    config.setup(context)

    assert mlflow.get_tracking_uri() == (tmp_path / "mlruns1").as_uri()
Пример #8
0
def convert_notebook(all_flag, overwrite_flag, filepath):
    """Convert selected or all notebooks found in a Kedro project
    to Kedro code, by exporting code from the appropriately-tagged cells:
    Cells tagged as `node` will be copied over to a Python file matching
    the name of the notebook, under `src/<package_name>/nodes`.
    *Note*: Make sure your notebooks have unique names!
    FILEPATH: Path(s) to exact notebook file(s) to be converted. Both
    relative and absolute paths are accepted.
    Should not be provided if --all flag is already present.
    """
    context = load_context(Path.cwd())

    if not filepath and not all_flag:
        secho(
            "Please specify a notebook filepath "
            "or add '--all' to convert all notebooks."
        )
        sys.exit(1)

    kedro_project_path = context.project_path
    kedro_package_name = "kedro_demo_feb2020"

    if all_flag:
        # pathlib glob does not ignore hidden directories,
        # whereas Python glob does, which is more useful in
        # ensuring checkpoints will not be included
        pattern = kedro_project_path / "**" / "*.ipynb"
        notebooks = sorted(Path(p) for p in iglob(str(pattern), recursive=True))
    else:
        notebooks = [Path(f) for f in filepath]

    counter = Counter(n.stem for n in notebooks)
    non_unique_names = [name for name, counts in counter.items() if counts > 1]
    if non_unique_names:
        raise KedroCliError(
            "Found non-unique notebook names! "
            "Please rename the following: {}".format(", ".join(non_unique_names))
        )

    for notebook in notebooks:
        secho("Converting notebook '{}'...".format(str(notebook)))
        output_path = (
            kedro_project_path
            / "src"
            / kedro_package_name
            / "nodes"
            / "{}.py".format(notebook.stem)
        )

        if output_path.is_file():
            overwrite = overwrite_flag or click.confirm(
                "Output file {} already exists. Overwrite?".format(str(output_path)),
                default=False,
            )
            if overwrite:
                export_nodes(notebook, output_path)
        else:
            export_nodes(notebook, output_path)

    secho("Done!")
Пример #9
0
def reload_kedro(path, line=None):
    """"Line magic which reloads all Kedro default variables."""
    global startup_error
    global context
    global io

    try:
        import kedro.config.default_logger
        from kedro.context import load_context
        from kedro.cli.jupyter import collect_line_magic
    except ImportError:
        logging.error(
            "Kedro appears not to be installed in your current environment "
            "or your current IPython session was not started in a valid Kedro project."
        )
        raise

    try:
        path = path or project_path
        logging.debug("Loading the context from %s", str(path))

        context = load_context(path)
        io = context.catalog
        logging.info("** Kedro project %s", str(context.project_name))
        logging.info("Defined global variable `context` and `catalog`")

        for line_magic in collect_line_magic():
            register_line_magic(line_magic)
            logging.info("Registered line magic `%s`", line_magic.__name__)
    except Exception as err:
        startup_error = err
        logging.exception("Kedro's ipython session startup script failed:\n%s",
                          str(err))
        raise err
Пример #10
0
    def test_kedro_yml_invalid_source_dir_pattern(self, fake_repo_path,
                                                  source_dir):
        """Test for invalid pattern for source_dir that is not relative to the project path.
        """
        kedro_yml_path = fake_repo_path / ".kedro.yml"
        kedro_yml_path.write_text(
            f"context_path: fake_package.run.ProjectContext\nsource_dir: {source_dir}\n"
        )
        source_path = (fake_repo_path /
                       Path(source_dir).expanduser()).resolve()

        pattern = re.escape(
            f"Source path '{source_path}' has to be relative to your project root "
            f"'{fake_repo_path.resolve()}'")
        with pytest.raises(KedroContextError, match=pattern):
            load_context(str(fake_repo_path))
Пример #11
0
def run(
    tag,
    env,
    parallel,
    runner,
    node_names,
    to_nodes,
    from_nodes,
    from_inputs,
    load_version,
    pipeline,
    config,
    params,
):
    """Run the pipeline."""
    if parallel and runner:
        raise KedroCliError(
            "Both --parallel and --runner options cannot be used together. "
            "Please use either --parallel or --runner.")
    if parallel:
        runner = "ParallelRunner"
    runner_class = load_obj(runner,
                            "kedro.runner") if runner else SequentialRunner

    context = load_context(Path.cwd(), env=env, extra_params=params)
    context.run(
        tags=tag,
        runner=runner_class(),
        node_names=node_names,
        from_nodes=from_nodes,
        to_nodes=to_nodes,
        from_inputs=from_inputs,
        load_versions=load_version,
        pipeline_name=pipeline,
    )
Пример #12
0
 def test_valid_context(self, fake_project):
     """Test getting project context."""
     result = load_context(str(fake_project))
     assert result.project_name == "fake"
     assert result.project_version == __version__
     assert str(fake_project.resolve()) in sys.path
     assert os.getcwd() == str(fake_project.resolve())
Пример #13
0
def get_project_context(key: Any,
                        default: Any = NO_DEFAULT) -> Any:  # pragma: no cover
    """Get a value from the project context.
    The user is responsible having the specified key in their project's context
    which typically is exposed in the ``__kedro_context__`` function in ``run.py``

    Args:
        key: Key in Kedro context dictionary.
        default: Default value if the key is not found. If not provided
            and the key is not found, this will raise a ``KedroCliError``.

    Returns:
        Requested value from Kedro context dictionary or the default if the key
            was not found.

    Raises:
        KedroCliError: When the key is not found and the default value was not
            specified.
    """

    _KEDRO_CONTEXT.update(load_context(Path.cwd()))
    try:
        value = _KEDRO_CONTEXT[key]
    except KeyError:
        if default is not NO_DEFAULT:
            return default
        _handle_exception("`{}` not found in the context returned by "
                          "__get_kedro_context__".format(key))

    return deepcopy(value)
Пример #14
0
 def test_valid_context_with_env(self, mocker, monkeypatch, fake_repo_path):
     """Test getting project context when Kedro config environment is
     specified in the environment variable.
     """
     mocker.patch("kedro.config.config.ConfigLoader.get")
     monkeypatch.setenv("KEDRO_ENV", "my_fake_env")
     result = load_context(str(fake_repo_path))
     assert result.env == "my_fake_env"
Пример #15
0
def get_project_context(key: str = "context", **kwargs) -> Any:
    """Gets the context value from context associated with the key.

    Args:
        key: Optional key to get associated value from Kedro context.
        Supported keys are "verbose" and "context", and it defaults to "context".
        kwargs: Optional custom arguments defined by users, which will be passed into
        the constructor of the projects KedroContext subclass.

    Returns:
        Requested value from Kedro context dictionary or the default if the key
            was not found.

    Raises:
        KedroCliError: When the key is not found and the default value was not
            specified.
    """
    def _deprecation_msg(key):
        msg_dict = {
            "get_config": ["config_loader", "ConfigLoader"],
            "create_catalog": ["catalog", "DataCatalog"],
            "create_pipeline": ["pipeline", "Pipeline"],
            "template_version": ["project_version", None],
            "project_name": ["project_name", None],
            "project_path": ["project_path", None],
        }
        attr, obj_name = msg_dict[key]
        msg = '`get_project_context("{}")` is now deprecated. '.format(key)
        if obj_name:
            msg += (
                "This is still returning a function that returns `{}` "
                "instance, however passed arguments have no effect anymore "
                "since Kedro 0.15.0. ".format(obj_name))
        msg += (
            "Please get `KedroContext` instance by calling `get_project_context()` "
            "and use its `{}` attribute.".format(attr))

        return msg

    context = load_context(Path.cwd(), **kwargs)
    # Dictionary to be compatible with existing Plugins. Future plugins should
    # retrieve necessary Kedro project properties from context
    value = {
        "context": context,
        "get_config":
        lambda project_path, env=None, **kw: context.config_loader,
        "create_catalog": lambda config, **kw: context.catalog,
        "create_pipeline": lambda **kw: context.pipeline,
        "template_version": context.project_version,
        "project_name": context.project_name,
        "project_path": context.project_path,
        "verbose": _VERBOSE,
    }[key]

    if key not in ("verbose", "context"):
        warnings.warn(_deprecation_msg(key), DeprecationWarning)

    return deepcopy(value)
Пример #16
0
 def test_valid_context(self, mocker, fake_repo_path):
     """Test getting project context."""
     # Disable logging.config.dictConfig in KedroContext._setup_logging as
     # it changes logging.config and affects other unit tests
     mocker.patch("logging.config.dictConfig")
     result = load_context(str(fake_repo_path))
     assert result.project_name == "Test Project"
     assert result.project_version == kedro.__version__
     assert str(fake_repo_path.resolve() / "src") in sys.path
     assert os.getcwd() == str(fake_repo_path.resolve())
Пример #17
0
    def test_kedro_yml_missing_source_dir(self, fake_repo_path):
        """If source dir is missing (it is by default), `src` is used to import package
           due to backward compatibility.
        """
        kedro_yml_path = fake_repo_path / ".kedro.yml"
        kedro_yml_path.write_text("context_path: fake_package.run.ProjectContext\n")

        result = load_context(str(fake_repo_path))
        assert result.project_name == "Test Project"
        assert result.project_version == kedro.__version__
        assert str(fake_repo_path.resolve() / "src") in sys.path
Пример #18
0
def test_kedro_mlflow_config_setup_export_credentials(mocker, tmp_path,
                                                      config_dir):
    # create a ".kedro.yml" file to identify "tmp_path" as the root of a kedro project
    mocker.patch("kedro_mlflow.utils._is_kedro_project", lambda x: True)

    (tmp_path / "conf/base/credentials.yml").write_text(
        yaml.dump(dict(my_mlflow_creds=dict(fake_mlflow_cred="my_fake_cred"))))

    # the config must restore properly the experiment
    config = KedroMlflowConfig(project_path=tmp_path,
                               credentials="my_mlflow_creds")
    context = load_context(tmp_path)
    config.setup(context)

    assert os.environ["fake_mlflow_cred"] == "my_fake_cred"
Пример #19
0
def test_kedro_mlflow_config_new_experiment_does_not_exists(
        mocker, tmp_path, config_dir):
    # create a ".kedro.yml" file to identify "tmp_path" as the root of a kedro project
    mocker.patch("kedro_mlflow.utils._is_kedro_project", return_value=True)

    config = KedroMlflowConfig(
        project_path=tmp_path,
        mlflow_tracking_uri="mlruns",
        experiment_opts=dict(name="exp1"),
    )
    context = load_context(tmp_path)
    config.setup(context)
    assert "exp1" in [
        exp.name for exp in config.mlflow_client.list_experiments()
    ]
Пример #20
0
async def predictor(model: str = 'rf_model'):
    """
        API для предикта по модели

        outputs:
            row_index: int
            data: string
            predict: float

    """
    if model == "rf_model":
        context = load_context("")
        output = context.run(pipeline_name='predict_api')

    return output
Пример #21
0
def reload_kedro(line=None):
    """"Line magic which reloads all Kedro default variables."""
    global proj_dir
    global proj_name
    global conf
    global io
    global startup_error
    try:
        import kedro.config.default_logger
        from kedro.context import load_context

        proj_name = "test-project"
        logging.info("** Kedro project {}".format(proj_name))

        project_context = load_context(proj_dir)

        conf = project_context["get_config"](proj_dir)
        io = project_context["create_catalog"](conf)

        logging.info(
            "Defined global variables proj_dir, proj_name, conf and io")
    except ImportError:
        logging.error(
            "Kedro appears not to be installed in your current environment.")
        raise
    except KeyError as err:
        startup_error = err
        if "create_catalog" in str(err):
            message = ("The function `create_catalog` is missing from "
                       "test-project/src/"
                       "test_project/run.py."
                       "\nEither restore this function, or update "
                       "test-project/"
                       ".ipython/profile_default/startup/00-kedro-init.py.")
        elif "get_config" in str(err):
            message = ("The function `get_config` is missing from "
                       "test-project/src/"
                       "test_project/run.py."
                       "\nEither restore this function, or update "
                       "test-project/"
                       ".ipython/profile_default/startup/00-kedro-init.py.")
        logging.error(message)
        raise err
    except Exception as err:
        startup_error = err
        logging.error("Kedro's ipython session startup script failed:\n%s",
                      str(err))
        raise err
Пример #22
0
    def test_kedro_yml_valid_source_dir(self, mocker, monkeypatch,
                                        fake_repo_path, source_dir):
        """Test for loading context from an valid source dir. """
        monkeypatch.delenv(
            "PYTHONPATH"
        )  # test we are also adding source_dir to PYTHONPATH as well

        kedro_yml_path = fake_repo_path / ".kedro.yml"
        kedro_yml_path.write_text(
            f"context_path: fake_package.run.ProjectContext\nsource_dir: {source_dir}\n"
        )

        result = load_context(str(fake_repo_path))
        assert result.project_name == "Test Project"
        assert result.project_version == kedro.__version__
        assert str(fake_repo_path.resolve() / source_dir) in sys.path
Пример #23
0
def test_kedro_mlflow_config_setup_set_tracking_uri(mocker, tmp_path,
                                                    config_dir):
    # create a ".kedro.yml" file to identify "tmp_path" as the root of a kedro project
    mocker.patch("kedro_mlflow.utils._is_kedro_project", lambda x: True)

    # create an experiment with the same name and then delete it
    mlflow_tracking_uri = (tmp_path / "awesome_tracking").as_uri()

    # the config must restore properly the experiment
    config = KedroMlflowConfig(
        project_path=tmp_path,
        mlflow_tracking_uri="awesome_tracking",
        experiment_opts=dict(name="exp1"),
    )
    context = load_context(tmp_path)
    config.setup(context)

    assert mlflow.get_tracking_uri() == mlflow_tracking_uri
Пример #24
0
def reload_kedro(path, line=None):
    """"Line magic which reloads all Kedro default variables."""
    global startup_error
    global context
    global catalog

    try:
        import kedro.config.default_logger
        from kedro.context import KEDRO_ENV_VAR, load_context
        from kedro.cli.jupyter import collect_line_magic
    except ImportError:
        logging.error(
            "Kedro appears not to be installed in your current environment "
            "or your current IPython session was not started in a valid Kedro project."
        )
        raise

    try:
        path = path or project_path
        logging.debug("Loading the context from %s", str(path))

        context = load_context(path, env=os.getenv(KEDRO_ENV_VAR))
        catalog = context.catalog

        # remove cached user modules
        package_name = context.__module__.split(".")[0]
        to_remove = [
            mod for mod in sys.modules if mod.startswith(package_name)
        ]
        for module in to_remove:
            del sys.modules[module]

        logging.info("** Kedro project %s", str(context.project_name))
        logging.info("Defined global variable `context` and `catalog`")

        for line_magic in collect_line_magic():
            register_line_magic(line_magic)
            logging.info("Registered line magic `%s`", line_magic.__name__)
    except Exception as err:
        startup_error = err
        logging.exception("Kedro's ipython session startup script failed:\n%s",
                          str(err))
        raise err
Пример #25
0
def list_datasets(pipeline, env):
    """Show datasets per type."""
    title = "DataSets in '{}' pipeline"
    not_mentioned = "Datasets not mentioned in pipeline"
    mentioned = "Datasets mentioned in pipeline"

    context = load_context(Path.cwd(), env=env)
    datasets_meta = context.catalog._data_sets
    catalog_ds = set(context.catalog.list())

    pipelines = pipeline or context.pipelines.keys()

    result = {}
    for pipeline in pipelines:
        pl_obj = context.pipelines.get(pipeline)
        if pl_obj:
            pipeline_ds = pl_obj.data_sets()
        else:
            existing_pls = ", ".join(sorted(context.pipelines.keys()))
            raise KedroCliError(
                "{} pipeline not found! Existing pipelines: {}".format(
                    pipeline, existing_pls))

        unused_ds = catalog_ds - pipeline_ds
        default_ds = pipeline_ds - catalog_ds
        used_ds = catalog_ds - unused_ds

        unused_by_type = _map_type_to_datasets(unused_ds, datasets_meta)
        used_by_type = _map_type_to_datasets(used_ds, datasets_meta)

        if default_ds:
            used_by_type["DefaultDataSet"].extend(default_ds)

        data = ((not_mentioned, dict(unused_by_type)), (mentioned,
                                                        dict(used_by_type)))
        result[title.format(pipeline)] = {
            key: value
            for key, value in data if value
        }

    secho(yaml.dump(result))
Пример #26
0
def main(
    tags: Iterable[str] = None,
    env: str = None,
    runner: Type[AbstractRunner] = None,
    node_names: Iterable[str] = None,
    from_nodes: Iterable[str] = None,
    to_nodes: Iterable[str] = None,
    from_inputs: Iterable[str] = None,
):
    """Application main entry point.

    Args:
        tags: An optional list of node tags which should be used to
            filter the nodes of the ``Pipeline``. If specified, only the nodes
            containing *any* of these tags will be run.
        env: An optional parameter specifying the environment in which
            the ``Pipeline`` should be run.
        runner: An optional parameter specifying the runner that you want to run
            the pipeline with.
        node_names: An optional list of node names which should be used to filter
            the nodes of the ``Pipeline``. If specified, only the nodes with these
            names will be run.
        from_nodes: An optional list of node names which should be used as a
            starting point of the new ``Pipeline``.
        to_nodes: An optional list of node names which should be used as an
            end point of the new ``Pipeline``.
        from_inputs: An optional list of input datasets which should be used as a
            starting point of the new ``Pipeline``.

    """

    project_context = load_context(Path.cwd(), env=env)
    project_context.run(
        tags=tags,
        runner=runner,
        node_names=node_names,
        from_nodes=from_nodes,
        to_nodes=to_nodes,
        from_inputs=from_inputs,
    )
Пример #27
0
def test_kedro_mlflow_config_experiment_was_deleted(mocker, tmp_path,
                                                    config_dir):
    # create a ".kedro.yml" file to identify "tmp_path" as the root of a kedro project
    mocker.patch("kedro_mlflow.utils._is_kedro_project", lambda x: True)

    # create an experiment with the same name and then delete it
    mlflow_tracking_uri = (tmp_path / "mlruns").as_uri()
    mlflow_client = MlflowClient(mlflow_tracking_uri)
    mlflow_client.create_experiment("exp1")
    mlflow_client.delete_experiment(
        mlflow_client.get_experiment_by_name("exp1").experiment_id)

    # the config must restore properly the experiment
    config = KedroMlflowConfig(
        project_path=tmp_path,
        mlflow_tracking_uri="mlruns",
        experiment_opts=dict(name="exp1"),
    )
    context = load_context(tmp_path)
    config.setup(context)
    assert "exp1" in [
        exp.name for exp in config.mlflow_client.list_experiments()
    ]
Пример #28
0
def run_package():
    # entry point for running pip-installed projects
    # using `<project_package>` command
    project_context = load_context(Path.cwd())
    project_context.run()
Пример #29
0
    for key in ["dag", "conf", "macros", "task", "task_instance", "ti", "var"]:
        del airflow_context[key]  # drop unpicklable things
    data_catalog.add_feed_dict({"airflow_context": airflow_context},
                               replace=True)

    # or add just the ones you need into Kedro parameters
    parameters = data_catalog.load("parameters")
    parameters["airflow_ds"] = airflow_context["ds"]
    data_catalog.save("parameters", parameters)

    return data_catalog


# Construct a DAG and then call into Kedro to have the operators constructed
dag = DAG(slugify("kedro-airflow-mushrooms"),
          default_args=default_args,
          schedule_interval=timedelta(days=1),
          catchup=False)

_context = load_context(project_path)
data_catalog = _context.catalog
pipeline = _context.pipeline

runner = AirflowRunner(
    dag=dag,
    process_context=process_context,
    operator_arguments=operator_specific_arguments,
)

runner.run(pipeline, data_catalog)
Пример #30
0
 def test_valid_context(self, fake_repo_path):
     """Test getting project context."""
     result = load_context(str(fake_repo_path))
     assert result.project_name == "Test Project"
     assert result.project_version == kedro.__version__
     assert str(fake_repo_path.resolve() / "src") in sys.path