Пример #1
0
def _reformat_load_versions(  # pylint: disable=unused-argument
    ctx, param, value
) -> Dict[str, str]:
    """Reformat data structure from tuple to dictionary for `load-version`.
        E.g ('dataset1:time1', 'dataset2:time2') -> {"dataset1": "time1", "dataset2": "time2"}.
    """
    load_versions_dict = {}

    for load_version in value:
        load_version_list = load_version.split(":", 1)
        if len(load_version_list) != 2:
            raise KedroCliError(
                f"Expected the form of `load_version` to be "
                f"`dataset_name:YYYY-MM-DDThh.mm.ss.sssZ`,"
                f"found {load_version} instead"
            )
        load_versions_dict[load_version_list[0]] = load_version_list[1]

    return load_versions_dict
Пример #2
0
def run(
    tag,
    env,
    parallel,
    runner,
    is_async,
    node_names,
    to_nodes,
    from_nodes,
    from_inputs,
    to_outputs,
    load_version,
    pipeline,
    config,
    params,
):
    """Run the pipeline."""
    if parallel and runner:
        raise KedroCliError(
            'Both --parallel and --runner options cannot be used together. '
            'Please use either --parallel or --runner.', )
    runner = runner or 'SequentialRunner'
    if parallel:
        runner = 'ParallelRunner'
    runner_class = load_obj(runner, 'kedro.runner')

    tag = _get_values_as_tuple(tag) if tag else tag
    node_names = _get_values_as_tuple(node_names) if node_names else node_names

    package_name = str(Path(__file__).resolve().parent.name)
    with KedroSession.create(package_name, env=env,
                             extra_params=params) as session:
        session.run(
            tags=tag,
            runner=runner_class(is_async=is_async),
            node_names=node_names,
            from_nodes=from_nodes,
            to_nodes=to_nodes,
            from_inputs=from_inputs,
            to_outputs=to_outputs,
            load_versions=load_version,
            pipeline_name=pipeline,
        )
Пример #3
0
def describe_pipeline(metadata: ProjectMetadata, name, env, **kwargs):  # pylint: disable=unused-argument
    """Describe a pipeline by providing the pipeline name as an argument."""
    session = _create_session(metadata.package_name, env=env)
    context = session.load_context()
    pipeline_obj = context.pipelines.get(name)
    if not pipeline_obj:
        existing_pipelines = ", ".join(sorted(context.pipelines.keys()))
        raise KedroCliError(
            f"`{name}` pipeline not found. Existing pipelines: [{existing_pipelines}]"
        )

    result = {
        "Nodes": [
            f"{node.short_name} ({node._func_name})"  # pylint: disable=protected-access
            for node in pipeline_obj.nodes
        ]
    }

    click.echo(yaml.dump(result))
Пример #4
0
def list_datasets(metadata: ProjectMetadata, pipeline, env):
    """Show datasets per type."""
    title = "DataSets in '{}' pipeline"
    not_mentioned = "Datasets not mentioned in pipeline"
    mentioned = "Datasets mentioned in pipeline"

    session = _create_session(metadata.package_name, env=env)
    context = session.load_context()
    datasets_meta = context.catalog._data_sets  # pylint: disable=protected-access
    catalog_ds = set(context.catalog.list())

    pipelines = pipeline or context.pipelines.keys()

    result = {}
    for pipe in pipelines:
        pl_obj = context.pipelines.get(pipe)
        if pl_obj:
            pipeline_ds = pl_obj.data_sets()
        else:
            existing_pls = ", ".join(sorted(context.pipelines.keys()))
            raise KedroCliError(
                f"`{pipe}` pipeline not found! Existing pipelines: {existing_pls}"
            )

        unused_ds = catalog_ds - pipeline_ds
        default_ds = pipeline_ds - catalog_ds
        used_ds = catalog_ds - unused_ds

        unused_by_type = _map_type_to_datasets(unused_ds, datasets_meta)
        used_by_type = _map_type_to_datasets(used_ds, datasets_meta)

        if default_ds:
            used_by_type["DefaultDataSet"].extend(default_ds)

        data = ((not_mentioned, dict(unused_by_type)), (mentioned,
                                                        dict(used_by_type)))
        result[title.format(pipe)] = {
            key: value
            for key, value in data if value
        }

    secho(yaml.dump(result))
Пример #5
0
def _unpack_wheel(location: str, destination: Path, fs_args: Optional[str]) -> None:
    filesystem = _get_fsspec_filesystem(location, fs_args)

    if location.endswith(".whl") and filesystem and filesystem.exists(location):
        with filesystem.open(location) as fs_file:
            ZipFile(fs_file).extractall(destination)
    else:
        python_call(
            "pip", ["download", "--no-deps", "--dest", str(destination), location]
        )
        wheel_file = list(destination.glob("*.whl"))
        # `--no-deps` should fetch only one wheel file, and CLI should fail if that's
        # not the case.
        if len(wheel_file) != 1:
            file_names = [wf.name for wf in wheel_file]
            raise KedroCliError(
                f"More than 1 or no wheel files found: {file_names}. "
                f"There has to be exactly one distribution file."
            )
        ZipFile(wheel_file[0]).extractall(destination)
Пример #6
0
def describe_pipeline(
    metadata: ProjectMetadata, name, **kwargs
):  # pylint: disable=unused-argument
    """Describe a pipeline by providing a pipeline name."""
    pipeline_obj = pipelines.get(name)
    if not pipeline_obj:
        all_pipeline_names = pipelines.keys()
        existing_pipelines = ", ".join(sorted(all_pipeline_names))
        raise KedroCliError(
            f"`{name}` pipeline not found. Existing pipelines: [{existing_pipelines}]"
        )

    result = {
        "Nodes": [
            f"{node.short_name} ({node._func_name})"  # pylint: disable=protected-access
            for node in pipeline_obj.nodes
        ]
    }

    click.echo(yaml.dump(result))
Пример #7
0
def describe_registered_pipeline(metadata: ProjectMetadata, name, **kwargs):  # pylint: disable=unused-argument, protected-access
    """Describe a registered pipeline by providing a pipeline name.
    Defaults to the `__default__` pipeline.
    """
    pipeline_obj = pipelines.get(name)
    if not pipeline_obj:
        all_pipeline_names = pipelines.keys()
        existing_pipelines = ", ".join(sorted(all_pipeline_names))
        raise KedroCliError(
            f"`{name}` pipeline not found. Existing pipelines: [{existing_pipelines}]"
        )

    nodes = []
    for node in pipeline_obj.nodes:
        namespace = f"{node.namespace}." if node.namespace else ""
        nodes.append(
            f"{namespace}{node._name or node._func_name} ({node._func_name})")
    result = {"Nodes": nodes}

    click.echo(yaml.dump(result))
Пример #8
0
def _find_run_command(package_name):
    try:
        project_cli = importlib.import_module(f"{package_name}.cli")
        # fail gracefully if cli.py does not exist
    except ModuleNotFoundError as exc:
        if f"{package_name}.cli" not in str(exc):
            raise
        plugins = load_entry_points("project")
        run = _find_run_command_in_plugins(plugins) if plugins else None
        if run:
            # use run command from installed plugin if it exists
            return run
        # use run command from the framework project
        from kedro.framework.cli.project import run

        return run
    # fail badly if cli.py exists, but has no `cli` in it
    if not hasattr(project_cli, "cli"):
        raise KedroCliError(f"Cannot load commands from {package_name}.cli")
    return project_cli.run
def run(
    tag,
    env,
    parallel,
    streaming,
    runner,
    node_names,
    to_nodes,
    from_nodes,
    from_inputs,
    load_version,
    pipeline,
    config,
    params,
):
    """Run the pipeline."""
    if parallel and runner:
        raise KedroCliError(
            "Both --parallel and --runner options cannot be used together. "
            "Please use either --parallel or --runner.")
    runner = runner or "SequentialRunner"
    if parallel:
        runner = "ParallelRunner"
    runner_class = load_obj(runner, "kedro.runner")
    if streaming:
        runner_class = load_obj("src.runner.StreamingRunner")

    tag = _get_values_as_tuple(tag) if tag else tag
    node_names = _get_values_as_tuple(node_names) if node_names else node_names

    context = load_context(Path.cwd(), env=env, extra_params=params)
    context.run(
        tags=tag,
        runner=runner_class(),
        node_names=node_names,
        from_nodes=from_nodes,
        to_nodes=to_nodes,
        from_inputs=from_inputs,
        load_versions=load_version,
        pipeline_name=pipeline,
    )
Пример #10
0
def run(
    tag,
    env,
    parallel,
    runner,
    is_async,
    node_names,
    to_nodes,
    from_nodes,
    from_inputs,
    load_version,
    pipeline,
    config,
    params,
):
    """Run the pipeline."""
    if parallel and runner:
        raise KedroCliError(
            "Both --parallel and --runner options cannot be used together. "
            "Please use either --parallel or --runner.")
    runner = runner or "SequentialRunner"
    if parallel:
        runner = "ParallelRunner"
    runner_class = load_obj(runner, "kedro.runner")

    tag = _get_values_as_tuple(tag) if tag else tag
    node_names = _get_values_as_tuple(node_names) if node_names else node_names

    with KedroSession.create(project_path=Path.cwd(),
                             env=env,
                             extra_params=params) as session:
        session.run(
            tags=tag,
            runner=runner_class(is_async=is_async),
            node_names=node_names,
            from_nodes=from_nodes,
            to_nodes=to_nodes,
            from_inputs=from_inputs,
            load_versions=load_version,
            pipeline_name=pipeline,
        )
Пример #11
0
def load_entry_points(name: str) -> List[str]:
    """Load package entry point commands.

    Args:
        name: The key value specified in ENTRY_POINT_GROUPS.

    Raises:
        KedroCliError: If loading an entry point failed.

    Returns:
        List of entry point commands.

    """
    entry_points = pkg_resources.iter_entry_points(group=ENTRY_POINT_GROUPS[name])
    entry_point_commands = []
    for entry_point in entry_points:
        try:
            entry_point_commands.append(entry_point.load())
        except Exception as exc:
            raise KedroCliError(f"Loading {name} commands from {entry_point}") from exc
    return entry_point_commands
Пример #12
0
def pull_package(metadata: ProjectMetadata, package_path, env, alias, fs_args,
                 **kwargs):  # pylint:disable=unused-argument
    """Pull and unpack a modular pipeline in your project.
    """

    with tempfile.TemporaryDirectory() as temp_dir:
        temp_dir_path = Path(temp_dir).resolve()

        _unpack_wheel(package_path, temp_dir_path, fs_args)

        dist_info_file = list(temp_dir_path.glob("*.dist-info"))
        if len(dist_info_file) != 1:
            raise KedroCliError(
                f"More than 1 or no dist-info files found from {package_path}. "
                f"There has to be exactly one dist-info directory.")
        # Extract package name, based on the naming convention for wheel files
        # https://www.python.org/dev/peps/pep-0427/#file-name-convention
        package_name = dist_info_file[0].stem.split("-")[0]

        _clean_pycache(temp_dir_path)
        _install_files(metadata, package_name, temp_dir_path, env, alias)
Пример #13
0
def lint(files, check_only):
    """Run flake8, isort and (on Python >=3.6) black."""
    files = files or (str(
        SOURCE_PATH / "tests"), str(SOURCE_PATH / KEDRO_PACKAGE_NAME))

    try:
        import flake8
        import isort
        import black
    except ImportError as exc:
        raise KedroCliError(
            NO_DEPENDENCY_MESSAGE.format(module=exc.name,
                                         src=str(SOURCE_PATH)))

    python_call("black", ("--check", ) + files if check_only else files)
    python_call("flake8", ("--max-line-length=88", ) + files)

    check_flag = ("-c", ) if check_only else ()
    python_call("isort",
                (*check_flag, "-rc", "-tc", "-up", "-fgw=0", "-m=3", "-w=88") +
                files)
Пример #14
0
def delete_pipeline(name, env, yes):
    """Delete a modular pipeline by providing the pipeline name as an argument."""
    try:
        context = load_context(Path.cwd(), env=env)
    except Exception as err:  # pylint: disable=broad-except
        _handle_exception(
            f"Unable to load Kedro context with environment `{env}`. "
            f"Make sure it exists in the project configuration.\nError: {err}")

    package_dir = _get_project_package_dir(context)

    env = env or "base"
    pipeline_artifacts = _get_pipeline_artifacts(context,
                                                 pipeline_name=name,
                                                 env=env)
    dirs = [path for path in pipeline_artifacts if path.is_dir()]

    if not yes:
        click.echo(
            "The following directories and everything within them will be removed:\n"
        )
        click.echo(indent("\n".join(str(dir_) for dir_ in dirs), " " * 2))
        click.echo()
        yes = click.confirm(
            f"Are you sure you want to delete pipeline `{name}`?")
        click.echo()

    if not yes:
        raise KedroCliError("Deletion aborted!")

    _delete_dirs(*dirs)
    click.secho(f"\nPipeline `{name}` was successfully deleted.\n", fg="green")
    click.secho(
        f"If you added the pipeline `{name}` to `create_pipelines()` in "
        f"`{package_dir / 'pipeline.py'}`, you will need to remove it.`",
        fg="yellow",
    )
Пример #15
0
def _parse_config_from_file(config_path: str) -> Dict[str, str]:
    """Parses the config YAML from its path.

    Args:
        config_path: The path of the config.yml file.

    Raises:
        KedroCliError: If the file cannot be parsed.

    Returns:
        The config as a dictionary.
    """
    try:
        with open(config_path, "r") as config_file:
            config = yaml.safe_load(config_file)

        if KedroCliError.VERBOSE_ERROR:
            click.echo(config_path + ":")
            click.echo(yaml.dump(config, default_flow_style=False))
    except Exception as exc:
        _show_example_config()
        raise KedroCliError(f"Failed to parse {config_path}.") from exc

    return config
Пример #16
0
def lint(
    metadata: ProjectMetadata, files, check_only, **kwargs
):  # pylint: disable=unused-argument
    """Run flake8, isort and black."""
    source_path = metadata.source_dir
    package_name = metadata.package_name
    files = files or (str(source_path / "tests"), str(source_path / package_name))

    if "PYTHONPATH" not in os.environ:
        # isort needs the source path to be in the 'PYTHONPATH' environment
        # variable to treat it as a first-party import location
        os.environ["PYTHONPATH"] = str(source_path)  # pragma: no cover

    for module_name in ("flake8", "isort", "black"):
        try:
            _check_module_importable(module_name)
        except KedroCliError as exc:
            raise KedroCliError(
                NO_DEPENDENCY_MESSAGE.format(module=module_name, src=str(source_path))
            ) from exc

    python_call("black", ("--check",) + files if check_only else files)
    python_call("flake8", files)
    python_call("isort", ("--check",) + files if check_only else files)
Пример #17
0
def new(config, starter_name, checkout):
    """Create a new kedro project, either interactively or from a
    configuration file.

    Create projects according to the Kedro default project template. This
    template is ideal for analytics projects and comes with a data
    architecture, folders for notebooks, configuration, source code, etc.

    \b
    ``kedro new``
    Create a new project interactively.

    \b
    You will have to provide four choices:
    * ``Project Name`` - name of the project, not to be confused with name of
    the project folder.
    * ``Repository Name`` - intended name of your project folder.
    * ``Package Name`` - intended name of your Python package.
    * ``Generate Example Pipeline`` - yes/no to generating an example pipeline
    in your project.

    \b
    ``kedro new --config <config.yml>``
    ``kedro new -c <config.yml>``
    Create a new project from configuration.

    * ``config.yml`` - The configuration YAML must contain at the top level
                    the above parameters (project_name, repo_name,
                    python_package, include_example) and output_dir - the
                    parent directory for the new project directory.

    \b
    ``kedro new --starter <starter>``
    Create a new project from a starter template. The starter can be either the path to
    a local directory, a URL to a remote VCS repository supported by `cookiecutter` or
    one of the aliases listed in ``kedro starter list``.

    \b
    ``kedro new --starter <starter> --checkout <checkout>``
    Create a new project from a starter template and a particular tag, branch or commit
    in the starter repository.

    """
    if checkout and not starter_name:
        raise KedroCliError(
            "Cannot use the --checkout flag without a --starter value.")

    if starter_name:
        template_path = _STARTER_ALIASES.get(starter_name, starter_name)
        should_prompt_for_example = False
    else:
        template_path = TEMPLATE_PATH
        should_prompt_for_example = True

    _create_project(
        config_path=config,
        verbose=_VERBOSE,
        template_path=template_path,
        should_prompt_for_example=should_prompt_for_example,
        checkout=checkout,
    )
Пример #18
0
def _create_project(
    config_path: str,
    template_path: Path = TEMPLATE_PATH,
    checkout: str = None,
    directory: str = None,
):  # pylint: disable=too-many-locals
    """Implementation of the kedro new cli command.

    Args:
        config_path: In non-interactive mode, the path of the config.yml which
            should contain the project_name, output_dir and repo_name.
        template_path: The path to the cookiecutter template to create the project.
            It could either be a local directory or a remote VCS repository
            supported by cookiecutter. For more details, please see:
            https://cookiecutter.readthedocs.io/en/latest/usage.html#generate-your-project
        checkout: The tag, branch or commit in the starter repository to checkout.
            Maps directly to cookiecutter's --checkout argument.
            If the value is not provided, cookiecutter will use the installed Kedro version
            by default.
        directory: The directory of a specific starter inside a repository containing
            multiple starters. Map directly to cookiecutter's --directory argument.
            https://cookiecutter.readthedocs.io/en/1.7.2/advanced/directories.html
    Raises:
        KedroCliError: If it fails to generate a project.
    """
    with _filter_deprecation_warnings():
        # pylint: disable=import-outside-toplevel
        from cookiecutter.exceptions import RepositoryCloneFailed, RepositoryNotFound
        from cookiecutter.main import cookiecutter  # for performance reasons
        from cookiecutter.repository import determine_repo_dir

    config: Dict[str, str] = dict()
    checkout = checkout or version
    try:
        if config_path:
            config = _parse_config(config_path)
            config = _check_config_ok(config_path, config)
        else:
            with tempfile.TemporaryDirectory() as tmpdir:
                temp_dir_path = Path(tmpdir).resolve()
                repo, _ = determine_repo_dir(
                    template=str(template_path),
                    abbreviations=dict(),
                    clone_to_dir=temp_dir_path,
                    checkout=checkout,
                    no_input=True,
                    directory=directory,
                )
                config_yml = temp_dir_path / repo / "starter_config.yml"
                if config_yml.is_file():
                    with open(config_yml) as config_file:
                        prompts = yaml.safe_load(config_file)
                    config = _get_config_from_starter_prompts(prompts)
        config.setdefault("kedro_version", version)

        cookiecutter_args = dict(
            output_dir=config.get("output_dir", str(Path.cwd().resolve())),
            no_input=True,
            extra_context=config,
            checkout=checkout,
        )
        if directory:
            cookiecutter_args["directory"] = directory
        result_path = Path(cookiecutter(str(template_path), **cookiecutter_args))
        _clean_pycache(result_path)
        _print_kedro_new_success_message(result_path)
    except click.exceptions.Abort as exc:  # pragma: no cover
        raise KedroCliError("User interrupt.") from exc
    except RepositoryNotFound as exc:
        raise KedroCliError(
            f"Kedro project template not found at {template_path}"
        ) from exc
    except RepositoryCloneFailed as exc:
        error_message = (
            f"Kedro project template not found at {template_path} with tag {checkout}."
        )
        tags = _get_available_tags(str(template_path).replace("git+", ""))
        if tags:
            error_message += (
                f" The following tags are available: {', '.join(tags.__iter__())}"
            )
        raise KedroCliError(error_message) from exc
    # we don't want the user to see a stack trace on the cli
    except Exception as exc:
        raise KedroCliError("Failed to generate project.") from exc
Пример #19
0
def new(
    config, starter_name, checkout, directory, **kwargs
):  # pylint: disable=unused-argument
    """Create a new kedro project, either interactively or from a
    configuration file.

    Create projects according to the Kedro default project template. This
    template is ideal for analytics projects and comes with a data
    architecture, folders for notebooks, configuration, source code, etc.

    \b
    ``kedro new``
    Create a new project interactively.

    \b
    You will have to provide four choices:
    * ``Project Name`` - name of the project, not to be confused with name of
    the project folder.
    * ``Repository Name`` - intended name of your project folder.
    * ``Package Name`` - intended name of your Python package.
    * ``Generate Example Pipeline`` - yes/no to generating an example pipeline
    in your project.

    \b
    ``kedro new --config <config.yml>``
    ``kedro new -c <config.yml>``
    Create a new project from configuration.

    * ``config.yml`` - The configuration YAML must contain at the top level
                    the above parameters (project_name, repo_name,
                    python_package) and output_dir - the
                    parent directory for the new project directory.

    \b
    ``kedro new --starter <starter>``
    Create a new project from a starter template. The starter can be either the path to
    a local directory, a URL to a remote VCS repository supported by `cookiecutter` or
    one of the aliases listed in ``kedro starter list``.

    \b
    ``kedro new --starter <starter> --checkout <checkout>``
    Create a new project from a starter template and a particular tag, branch or commit
    in the starter repository.

    \b
    ``kedro new --starter <starter> --directory <directory>``
    Create a new project from a starter repository and a directory within the location.
    Useful when you have multiple starters in the same repository.
    """
    if checkout and not starter_name:
        raise KedroCliError("Cannot use the --checkout flag without a --starter value.")

    if directory and not starter_name:
        raise KedroCliError(
            "Cannot use the --directory flag without a --starter value."
        )

    if starter_name in _STARTER_ALIASES:
        if directory:
            raise KedroCliError(
                "Cannot use the --directory flag with a --starter alias."
            )
        template_path = _STARTERS_REPO
        directory = starter_name
    elif starter_name is not None:
        template_path = starter_name
    else:
        template_path = TEMPLATE_PATH

    _create_project(
        config_path=config,
        template_path=template_path,
        checkout=checkout,
        directory=directory,
    )
Пример #20
0
def _port_callback(ctx, param, value):  # pylint: disable=unused-argument
    if is_port_in_use(value):
        raise KedroCliError(f"Port {value} is already in use on the host. "
                            f"Please specify an alternative port number.")
    return value
Пример #21
0
def _assert_include_example_ok(include_example):
    if not isinstance(include_example, bool):
        message = (
            "`{}` value for `include_example` is invalid. It must be a boolean value "
            "True or False.".format(include_example))
        raise KedroCliError(message)
Пример #22
0
def convert_notebook(
    metadata: ProjectMetadata, all_flag, overwrite_flag, filepath, env, **kwargs
):  # pylint: disable=unused-argument, too-many-locals
    """Convert selected or all notebooks found in a Kedro project
    to Kedro code, by exporting code from the appropriately-tagged cells:
    Cells tagged as `node` will be copied over to a Python file matching
    the name of the notebook, under `<source_dir>/<package_name>/nodes`.
    *Note*: Make sure your notebooks have unique names!
    FILEPATH: Path(s) to exact notebook file(s) to be converted. Both
    relative and absolute paths are accepted.
    Should not be provided if --all flag is already present.
    """
    project_path = metadata.project_path
    source_path = metadata.source_dir
    package_name = metadata.package_name

    _update_ipython_dir(project_path)

    if not filepath and not all_flag:
        secho(
            "Please specify a notebook filepath "
            "or add '--all' to convert all notebooks."
        )
        sys.exit(1)

    if all_flag:
        # pathlib glob does not ignore hidden directories,
        # whereas Python glob does, which is more useful in
        # ensuring checkpoints will not be included
        pattern = project_path / "**" / "*.ipynb"
        notebooks = sorted(Path(p) for p in iglob(str(pattern), recursive=True))
    else:
        notebooks = [Path(f) for f in filepath]

    counter = Counter(n.stem for n in notebooks)
    non_unique_names = [name for name, counts in counter.items() if counts > 1]
    if non_unique_names:
        names = ", ".join(non_unique_names)
        raise KedroCliError(
            f"Found non-unique notebook names! Please rename the following: {names}"
        )

    output_dir = source_path / package_name / "nodes"
    if not output_dir.is_dir():
        output_dir.mkdir()
        (output_dir / "__init__.py").touch()

    for notebook in notebooks:
        secho(f"Converting notebook '{notebook}'...")
        output_path = output_dir / f"{notebook.stem}.py"

        if output_path.is_file():
            overwrite = overwrite_flag or click.confirm(
                f"Output file {output_path} already exists. Overwrite?", default=False
            )
            if overwrite:
                _export_nodes(notebook, output_path)
        else:
            _export_nodes(notebook, output_path)

    secho("Done!", color="green")  # type: ignore
Пример #23
0
def _validate_dir(path: Path) -> None:
    if not path.is_dir():
        raise KedroCliError(f"Directory '{path}' doesn't exist.")
    if not list(path.iterdir()):
        raise KedroCliError(f"'{path}' is an empty directory.")
Пример #24
0
def compose_docker_run_args(
    host_root: str = None,
    container_root: str = None,
    mount_volumes: Sequence[str] = None,
    required_args: Sequence[Tuple[str, Union[str, None]]] = None,
    optional_args: Sequence[Tuple[str, Union[str, None]]] = None,
    user_args: Sequence[str] = None,
) -> List[str]:
    """
    Make a list of arguments for the docker command.

    Args:
        host_root: Path project root on the host. It must be provided
            if `mount_volumes` are specified, optional otherwise.
        container_root: Path to project root in the container
            (e.g., `/home/kedro/<repo_name>`). It must be
            provided if `mount_volumes` are specified, optional otherwise.
        mount_volumes: List of volumes to be mounted.
        required_args: List of required arguments.
        optional_args: List of optional arguments, these will be added if only
            not present in `user_args` list.
        user_args: List of arguments already specified by the user.
    Raises:
        KedroCliError: If `mount_volumes` are provided but either `host_root`
            or `container_root` are missing.

    Returns:
        List of arguments for the docker command.
    """

    mount_volumes = mount_volumes or []
    required_args = required_args or []
    optional_args = optional_args or []
    user_args = user_args or []
    split_user_args = {ua.split("=", 1)[0] for ua in user_args}

    def _add_args(name_: str,
                  value_: str = None,
                  force_: bool = False) -> List[str]:
        """
        Add extra args to existing list of CLI args.
        Args:
            name_: Arg name to add.
            value_: Arg value to add, skipped if None.
            force_: Add the argument even if it's present in the current list of args.

        Returns:
            List containing the new args and (optionally) its value or an empty list
                if no values to be added.
        """
        if not force_ and name_ in split_user_args:
            return []
        return [name_] if value_ is None else [name_, value_]

    if mount_volumes:
        if not (host_root and container_root):
            raise KedroCliError("Both `host_root` and `container_root` must "
                                "be specified in `compose_docker_run_args` "
                                "call if `mount_volumes` are provided.")
        vol_gen = _list_docker_volumes(host_root, container_root,
                                       mount_volumes)
        combined_args = list(chain.from_iterable(vol_gen))
    else:
        combined_args = []
    for arg_name, arg_value in required_args:
        combined_args += _add_args(arg_name, arg_value, True)
    for arg_name, arg_value in optional_args:
        combined_args += _add_args(arg_name, arg_value)
    return combined_args + user_args
Пример #25
0
def _assert_repo_name_ok(repo_name):
    if not re.match(r"^\w+(-*\w+)*$", repo_name):
        message = ("`{}` is not a valid repository name. It must contain "
                   "only word symbols and/or hyphens, must also start and "
                   "end with alphanumeric symbol.".format(repo_name))
        raise KedroCliError(message)
Пример #26
0
def _check_module_path(ctx, param, value):  # pylint: disable=unused-argument
    if value and not re.match(r"^[\w.]+$", value):
        message = "The pipeline location you provided is not a valid Python module path"
        raise KedroCliError(message)
    return value