Exemplo n.º 1
0
    def test_pull_config_missing(self, fake_kedro_cli, dummy_project, env, alias):
        """
        Test for pulling a valid wheel file locally, but `config` directory is missing
        from the wheel file.
        """
        # pylint: disable=too-many-locals
        self.call_pipeline_create(fake_kedro_cli)
        config_path = dummy_project / "conf" / "base" / "pipelines" / PIPELINE_NAME
        shutil.rmtree(config_path)
        assert not config_path.exists()
        self.call_pipeline_package(fake_kedro_cli)
        self.call_pipeline_delete(fake_kedro_cli)

        source_path = dummy_project / "src" / PACKAGE_NAME / "pipelines" / PIPELINE_NAME
        test_path = dummy_project / "src" / "tests" / "pipelines" / PIPELINE_NAME
        # Make sure the files actually deleted before pulling from the wheel file.
        assert not source_path.exists()
        assert not test_path.exists()

        wheel_file = (
            dummy_project
            / "src"
            / "dist"
            / _get_wheel_name(name=PIPELINE_NAME, version="0.1")
        )
        assert wheel_file.is_file()

        options = ["-e", env] if env else []
        options += ["--alias", alias] if alias else []
        result = CliRunner().invoke(
            fake_kedro_cli.cli, ["pipeline", "pull", str(wheel_file), *options]
        )
        assert result.exit_code == 0

        pipeline_name = alias or PIPELINE_NAME
        source_dest = dummy_project / "src" / PACKAGE_NAME / "pipelines" / pipeline_name
        config_env = env or "base"
        config_dest = dummy_project / "conf" / config_env / "pipelines" / pipeline_name
        test_dest = dummy_project / "src" / "tests" / "pipelines" / pipeline_name

        self.assert_package_files_exist(source_dest)
        assert not config_dest.exists()
        assert {f.name for f in test_dest.iterdir()} == {
            "__init__.py",
            "test_pipeline.py",
        }
Exemplo n.º 2
0
    def test_package_pipeline_with_deep_nested_parameters(
            self, fake_repo_path, fake_project_cli, fake_metadata):
        CliRunner().invoke(fake_project_cli, ["pipeline", "create", "retail"],
                           obj=fake_metadata)
        deep_nested_param_path = Path(fake_repo_path / "conf" / "base" /
                                      "parameters" / "deep" / "retail")
        deep_nested_param_path.mkdir(parents=True, exist_ok=True)
        (deep_nested_param_path / "params1.yml").touch()

        deep_nested_param_path2 = Path(fake_repo_path / "conf" / "base" /
                                       "parameters" / "retail" / "deep")
        deep_nested_param_path2.mkdir(parents=True, exist_ok=True)
        (deep_nested_param_path2 / "params1.yml").touch()

        deep_nested_param_path3 = Path(fake_repo_path / "conf" / "base" /
                                       "parameters" / "deep")
        deep_nested_param_path3.mkdir(parents=True, exist_ok=True)
        (deep_nested_param_path3 / "retail.yml").touch()

        super_deep_nested_param_path = Path(fake_repo_path / "conf" / "base" /
                                            "parameters" / "a" / "b" / "c" /
                                            "d" / "retail")
        super_deep_nested_param_path.mkdir(parents=True, exist_ok=True)
        (super_deep_nested_param_path / "params3.yml").touch()
        result = CliRunner().invoke(fake_project_cli,
                                    ["pipeline", "package", "retail"],
                                    obj=fake_metadata)

        assert result.exit_code == 0
        assert "Pipeline `retail` packaged!" in result.output

        wheel_location = fake_repo_path / "src" / "dist"
        assert f"Location: {wheel_location}" in result.output

        wheel_name = _get_wheel_name(name="retail", version="0.1")
        wheel_file = wheel_location / wheel_name
        assert wheel_file.is_file()
        assert len(list(wheel_location.iterdir())) == 1

        # pylint: disable=consider-using-with
        wheel_contents = set(ZipFile(str(wheel_file)).namelist())
        assert "retail/config/parameters/deep/retail/params1.yml" in wheel_contents
        assert "retail/config/parameters/retail/deep/params1.yml" in wheel_contents
        assert "retail/config/parameters/retail.yml" in wheel_contents
        assert "retail/config/parameters/deep/retail.yml" in wheel_contents
        assert "retail/config/parameters/a/b/c/d/retail/params3.yml" in wheel_contents
Exemplo n.º 3
0
    def test_pipeline_pull_all(  # pylint: disable=too-many-locals
            self, fake_repo_path, fake_project_cli, fake_metadata, mocker):
        # pylint: disable=import-outside-toplevel
        from kedro.framework.cli import pipeline

        spy = mocker.spy(pipeline, "_pull_package")
        pyproject_toml = fake_repo_path / "pyproject.toml"
        wheel_file = (lambda name: fake_repo_path / "src" / "dist" /
                      _get_wheel_name(name=name, version="0.1"))
        project_toml_str = textwrap.dedent(f"""
            [tool.kedro.pipeline.pull]
            "{wheel_file("first")}" = {{alias = "dp"}}
            "{wheel_file("second")}" = {{alias = "ds", env = "local"}}
            "{wheel_file("third")}" = {{}}
            """)

        with pyproject_toml.open(mode="a") as file:
            file.write(project_toml_str)

        for name in ("first", "second", "third"):
            call_pipeline_create(fake_project_cli,
                                 fake_metadata,
                                 pipeline_name=name)
            call_pipeline_package(fake_project_cli,
                                  fake_metadata,
                                  pipeline_name=name)
            call_pipeline_delete(fake_project_cli,
                                 fake_metadata,
                                 pipeline_name=name)

        result = CliRunner().invoke(fake_project_cli,
                                    ["pipeline", "pull", "--all"],
                                    obj=fake_metadata)

        assert result.exit_code == 0
        assert "Pipelines pulled and unpacked!" in result.output
        assert spy.call_count == 3

        build_config = toml.loads(project_toml_str)
        pull_manifest = build_config["tool"]["kedro"]["pipeline"]["pull"]
        for wheel_file, pull_specs in pull_manifest.items():
            expected_call = mocker.call(wheel_file, fake_metadata,
                                        **pull_specs)
            assert expected_call in spy.call_args_list
    def assert_wheel_contents_correct(
        self, wheel_location, package_name=PIPELINE_NAME, version="0.1"
    ):
        wheel_name = _get_wheel_name(name=package_name, version=version)
        wheel_file = wheel_location / wheel_name
        assert wheel_file.is_file()
        assert len(list((wheel_location).iterdir())) == 1

        wheel_contents = set(ZipFile(str(wheel_file)).namelist())
        expected_files = {
            f"{package_name}/__init__.py",
            f"{package_name}/README.md",
            f"{package_name}/nodes.py",
            f"{package_name}/pipeline.py",
            f"{package_name}/config/parameters/{package_name}.yml",
            "tests/__init__.py",
            "tests/test_pipeline.py",
        }
        assert expected_files <= wheel_contents
Exemplo n.º 5
0
    def test_pipeline_alias_refactors_imports(
        self, fake_project_cli, fake_package_path, fake_repo_path, fake_metadata
    ):
        call_pipeline_create(fake_project_cli, fake_metadata)
        pipeline_file = fake_package_path / "pipelines" / PIPELINE_NAME / "pipeline.py"
        import_stmt = (
            f"import {fake_metadata.package_name}.pipelines.{PIPELINE_NAME}.nodes"
        )
        with pipeline_file.open("a") as f:
            f.write(import_stmt)

        package_alias = "alpha"
        pull_alias = "beta"

        call_pipeline_package(
            cli=fake_project_cli, metadata=fake_metadata, alias=package_alias
        )

        wheel_file = (
            fake_repo_path
            / "src"
            / "dist"
            / _get_wheel_name(name=package_alias, version="0.1")
        )
        CliRunner().invoke(
            fake_project_cli, ["pipeline", "pull", str(wheel_file)], obj=fake_metadata
        )
        CliRunner().invoke(
            fake_project_cli,
            ["pipeline", "pull", str(wheel_file), "--alias", pull_alias],
            obj=fake_metadata,
        )

        for alias in (package_alias, pull_alias):
            path = fake_package_path / "pipelines" / alias / "pipeline.py"
            file_content = path.read_text()
            expected_stmt = (
                f"import {fake_metadata.package_name}.pipelines.{alias}.nodes"
            )
            assert expected_stmt in file_content
Exemplo n.º 6
0
    def test_pull_whl_fs_args(self, fake_project_cli, fake_repo_path, mocker,
                              tmp_path, fake_metadata):
        """Test for pulling a wheel file with custom fs_args specified."""
        call_pipeline_create(fake_project_cli, fake_metadata)
        call_pipeline_package(fake_project_cli, fake_metadata)
        call_pipeline_delete(fake_project_cli, fake_metadata)

        fs_args_config = tmp_path / "fs_args_config.yml"
        with fs_args_config.open(mode="w") as f:
            yaml.dump({"fs_arg_1": 1, "fs_arg_2": {"fs_arg_2_nested_1": 2}}, f)
        mocked_filesystem = mocker.patch("fsspec.filesystem")

        wheel_file = (fake_repo_path / "src" / "dist" /
                      _get_wheel_name(name=PIPELINE_NAME, version="0.1"))

        options = ["--fs-args", str(fs_args_config)]
        CliRunner().invoke(
            fake_project_cli,
            ["pipeline", "pull", str(wheel_file), *options])

        mocked_filesystem.assert_called_once_with(
            "file", fs_arg_1=1, fs_arg_2=dict(fs_arg_2_nested_1=2))
Exemplo n.º 7
0
    def test_pull_two_dist_info(self, fake_kedro_cli, dummy_project, mocker,
                                tmp_path):
        """
        Test for pulling a wheel file with more than one dist-info directory.
        """
        self.call_pipeline_create(fake_kedro_cli)
        self.call_pipeline_package(fake_kedro_cli)
        wheel_file = (dummy_project / "src" / "dist" /
                      _get_wheel_name(name=PIPELINE_NAME, version="0.1"))
        assert wheel_file.is_file()

        (tmp_path / "dummy.dist-info").mkdir()

        mocker.patch(
            "kedro.framework.cli.pipeline.tempfile.TemporaryDirectory",
            return_value=tmp_path,
        )
        result = CliRunner().invoke(
            fake_kedro_cli.cli,
            ["pipeline", "pull", str(wheel_file)])
        assert result.exit_code
        assert "Error: More than 1 or no dist-info files found" in result.output
    def test_package_pipeline_no_config(
        self, fake_repo_path, fake_project_cli, fake_metadata
    ):
        result = CliRunner().invoke(
            fake_project_cli.cli,
            ["pipeline", "create", PIPELINE_NAME, "--skip-config"],
            obj=fake_metadata,
        )
        assert result.exit_code == 0
        result = CliRunner().invoke(
            fake_project_cli.cli,
            ["pipeline", "package", PIPELINE_NAME],
            obj=fake_metadata,
        )

        assert result.exit_code == 0
        assert f"Pipeline `{PIPELINE_NAME}` packaged!" in result.output

        wheel_location = fake_repo_path / "src" / "dist"
        assert f"Location: {wheel_location}" in result.output

        # the wheel contents are slightly different (config shouldn't be included),
        # which is why we can't call self.assert_wheel_contents_correct here
        wheel_file = wheel_location / _get_wheel_name(name=PIPELINE_NAME, version="0.1")
        assert wheel_file.is_file()
        assert len(list((fake_repo_path / "src" / "dist").iterdir())) == 1

        wheel_contents = set(ZipFile(str(wheel_file)).namelist())
        expected_files = {
            f"{PIPELINE_NAME}/__init__.py",
            f"{PIPELINE_NAME}/README.md",
            f"{PIPELINE_NAME}/nodes.py",
            f"{PIPELINE_NAME}/pipeline.py",
            "tests/__init__.py",
            "tests/test_pipeline.py",
        }
        assert expected_files <= wheel_contents
        assert f"{PIPELINE_NAME}/config/parameters.yml" not in wheel_contents
Exemplo n.º 9
0
    def test_pull_local_whl_compare(self, fake_kedro_cli, dummy_project, env, alias):
        """
        Test for pulling a valid wheel file locally, unpack it into another location and
        check that unpacked files are identical to the ones in the original modular pipeline.
        """
        # pylint: disable=too-many-locals
        pipeline_name = "another_pipeline"
        self.call_pipeline_create(fake_kedro_cli)
        self.call_pipeline_package(fake_kedro_cli, pipeline_name)

        source_path = dummy_project / "src" / PACKAGE_NAME / "pipelines" / PIPELINE_NAME
        config_path = dummy_project / "conf" / "base" / "pipelines" / PIPELINE_NAME
        test_path = dummy_project / "src" / "tests" / "pipelines" / PIPELINE_NAME

        wheel_file = (
            dummy_project
            / "src"
            / "dist"
            / _get_wheel_name(name=pipeline_name, version="0.1")
        )
        assert wheel_file.is_file()

        options = ["-e", env] if env else []
        options += ["--alias", alias] if alias else []
        result = CliRunner().invoke(
            fake_kedro_cli.cli, ["pipeline", "pull", str(wheel_file), *options]
        )
        assert result.exit_code == 0

        pipeline_name = alias or pipeline_name
        source_dest = dummy_project / "src" / PACKAGE_NAME / "pipelines" / pipeline_name
        config_env = env or "base"
        config_dest = dummy_project / "conf" / config_env / "pipelines" / pipeline_name
        test_dest = dummy_project / "src" / "tests" / "pipelines" / pipeline_name

        assert not filecmp.dircmp(source_path, source_dest).diff_files
        assert not filecmp.dircmp(config_path, config_dest).diff_files
        assert not filecmp.dircmp(test_path, test_dest).diff_files
Exemplo n.º 10
0
    def test_pipeline_package_version(self, fake_repo_path, fake_package_path,
                                      fake_project_cli, fake_metadata):
        _pipeline_name = "data_engineering"
        # the test version value is set separately in
        # features/steps/test_starter/<repo>/src/<package>/pipelines/data_engineering/__init__.py
        _test_version = "4.20.69"

        pipelines_dir = fake_package_path / "pipelines" / _pipeline_name
        assert pipelines_dir.is_dir()

        result = CliRunner().invoke(fake_project_cli,
                                    ["pipeline", "package", _pipeline_name],
                                    obj=fake_metadata)
        assert result.exit_code == 0

        # test for actual version
        wheel_location = fake_repo_path / "src" / "dist"
        wheel_name = _get_wheel_name(name=_pipeline_name,
                                     version=_test_version)
        wheel_file = wheel_location / wheel_name

        assert wheel_file.is_file()
        assert len(list(wheel_location.iterdir())) == 1
Exemplo n.º 11
0
    def test_pipeline_package_overwrites_wheel(self, fake_kedro_cli, tmp_path):
        destination = (tmp_path / "in" / "here").resolve()
        destination.mkdir(parents=True)
        wheel_file = destination / _get_wheel_name(name=PIPELINE_NAME, version="0.1")
        wheel_file.touch()

        result = CliRunner().invoke(
            fake_kedro_cli.cli, ["pipeline", "create", PIPELINE_NAME]
        )
        assert result.exit_code == 0
        result = CliRunner().invoke(
            fake_kedro_cli.cli,
            ["pipeline", "package", PIPELINE_NAME, "--destination", str(destination)],
        )
        assert result.exit_code == 0

        warning_message = f"Package file {wheel_file} will be overwritten!"
        success_message = (
            f"Pipeline `{PIPELINE_NAME}` packaged! Location: {destination}"
        )
        assert warning_message in result.output
        assert success_message in result.output

        self.assert_wheel_contents_correct(wheel_location=destination)
Exemplo n.º 12
0
    def test_pull_from_pypi(
        self,
        fake_project_cli,
        fake_repo_path,
        mocker,
        tmp_path,
        fake_package_path,
        env,
        alias,
        fake_metadata,
    ):
        """
        Test for pulling a valid wheel file from pypi.
        """
        # pylint: disable=too-many-locals
        self.call_pipeline_create(fake_project_cli.cli, fake_metadata)
        # We mock the `pip download` call, and manually create a package wheel file
        # to simulate the pypi scenario instead
        self.call_pipeline_package(
            fake_project_cli.cli, fake_metadata, destination=tmp_path
        )
        wheel_file = tmp_path / _get_wheel_name(name=PIPELINE_NAME, version="0.1")
        assert wheel_file.is_file()
        self.call_pipeline_delete(fake_project_cli.cli, fake_metadata)

        source_path = fake_package_path / "pipelines" / PIPELINE_NAME
        test_path = fake_repo_path / "src" / "tests" / "pipelines" / PIPELINE_NAME
        source_params_config = (
            fake_repo_path / CONF_ROOT / "base" / "parameters" / f"{PIPELINE_NAME}.yml"
        )
        # Make sure the files actually deleted before pulling from pypi.
        assert not source_path.exists()
        assert not test_path.exists()
        assert not source_params_config.exists()

        python_call_mock = mocker.patch("kedro.framework.cli.pipeline.python_call")
        mocker.patch(
            "kedro.framework.cli.pipeline.tempfile.TemporaryDirectory",
            return_value=tmp_path,
        )

        options = ["-e", env] if env else []
        options += ["--alias", alias] if alias else []
        result = CliRunner().invoke(
            fake_project_cli.cli,
            ["pipeline", "pull", PIPELINE_NAME, *options],
            obj=fake_metadata,
        )
        assert result.exit_code == 0

        python_call_mock.assert_called_once_with(
            "pip", ["download", "--no-deps", "--dest", str(tmp_path), PIPELINE_NAME],
        )

        pipeline_name = alias or PIPELINE_NAME
        source_dest = fake_package_path / "pipelines" / pipeline_name
        test_dest = fake_repo_path / "src" / "tests" / "pipelines" / pipeline_name
        config_env = env or "base"
        dest_params_config = (
            fake_repo_path
            / CONF_ROOT
            / config_env
            / "parameters"
            / f"{pipeline_name}.yml"
        )

        self.assert_package_files_exist(source_dest)
        assert dest_params_config.is_file()
        actual_test_files = {f.name for f in test_dest.iterdir()}
        expected_test_files = {"__init__.py", "test_pipeline.py"}
        assert actual_test_files == expected_test_files
Exemplo n.º 13
0
    def test_pull_config_missing(
        self,
        fake_project_cli,
        fake_repo_path,
        fake_package_path,
        env,
        alias,
        fake_metadata,
    ):
        """
        Test for pulling a valid wheel file locally, but `config` directory is missing
        from the wheel file.
        """
        # pylint: disable=too-many-locals
        self.call_pipeline_create(fake_project_cli.cli, fake_metadata)
        source_params_config = (
            fake_repo_path / CONF_ROOT / "base" / "parameters" / f"{PIPELINE_NAME}.yml"
        )
        source_params_config.unlink()
        self.call_pipeline_package(fake_project_cli.cli, fake_metadata)
        self.call_pipeline_delete(fake_project_cli.cli, fake_metadata)

        source_path = fake_package_path / "pipelines" / PIPELINE_NAME
        test_path = fake_repo_path / "src" / "tests" / "pipelines" / PIPELINE_NAME
        # Make sure the files actually deleted before pulling from the wheel file.
        assert not source_path.exists()
        assert not test_path.exists()

        wheel_file = (
            fake_repo_path
            / "src"
            / "dist"
            / _get_wheel_name(name=PIPELINE_NAME, version="0.1")
        )
        assert wheel_file.is_file()

        options = ["-e", env] if env else []
        options += ["--alias", alias] if alias else []
        result = CliRunner().invoke(
            fake_project_cli.cli,
            ["pipeline", "pull", str(wheel_file), *options],
            obj=fake_metadata,
        )
        assert result.exit_code == 0

        pipeline_name = alias or PIPELINE_NAME
        source_dest = fake_package_path / "pipelines" / pipeline_name
        test_dest = fake_repo_path / "src" / "tests" / "pipelines" / pipeline_name
        config_env = env or "base"
        dest_params_config = (
            fake_repo_path
            / CONF_ROOT
            / config_env
            / "parameters"
            / f"{pipeline_name}.yml"
        )

        self.assert_package_files_exist(source_dest)
        assert not dest_params_config.exists()
        actual_test_files = {f.name for f in test_dest.iterdir()}
        expected_test_files = {"__init__.py", "test_pipeline.py"}
        assert actual_test_files == expected_test_files
Exemplo n.º 14
0
    def test_pull_local_whl_compare(
        self,
        fake_project_cli,
        fake_repo_path,
        fake_package_path,
        env,
        alias,
        fake_metadata,
    ):
        """Test for pulling a valid wheel file locally, unpack it
        into another location and check that unpacked files
        are identical to the ones in the original modular pipeline.
        """
        # pylint: disable=too-many-locals
        pipeline_name = "another_pipeline"
        call_pipeline_create(fake_project_cli, fake_metadata)
        call_pipeline_package(fake_project_cli, fake_metadata, alias=pipeline_name)

        source_path = fake_package_path / "pipelines" / PIPELINE_NAME
        test_path = fake_repo_path / "src" / "tests" / "pipelines" / PIPELINE_NAME
        source_params_config = (
            fake_repo_path
            / settings.CONF_ROOT
            / "base"
            / "parameters"
            / f"{PIPELINE_NAME}.yml"
        )

        wheel_file = (
            fake_repo_path
            / "src"
            / "dist"
            / _get_wheel_name(name=pipeline_name, version="0.1")
        )
        assert wheel_file.is_file()

        options = ["-e", env] if env else []
        options += ["--alias", alias] if alias else []
        result = CliRunner().invoke(
            fake_project_cli,
            ["pipeline", "pull", str(wheel_file), *options],
            obj=fake_metadata,
        )
        assert result.exit_code == 0, result.output
        assert "pulled and unpacked" in result.output

        pipeline_name = alias or pipeline_name
        source_dest = fake_package_path / "pipelines" / pipeline_name
        test_dest = fake_repo_path / "src" / "tests" / "pipelines" / pipeline_name
        config_env = env or "base"
        dest_params_config = (
            fake_repo_path
            / settings.CONF_ROOT
            / config_env
            / "parameters"
            / f"{pipeline_name}.yml"
        )

        assert not filecmp.dircmp(source_path, source_dest).diff_files
        assert not filecmp.dircmp(test_path, test_dest).diff_files
        assert source_params_config.read_bytes() == dest_params_config.read_bytes()