Пример #1
0
def convert_notebook(all_flag, overwrite_flag, filepath):
    """Convert selected or all notebooks found in a Kedro project
    to Kedro code, by exporting code from the appropriately-tagged cells:
    Cells tagged as `node` will be copied over to a Python file matching
    the name of the notebook, under `src/<package_name>/nodes`.
    *Note*: Make sure your notebooks have unique names!
    FILEPATH: Path(s) to exact notebook file(s) to be converted. Both
    relative and absolute paths are accepted.
    Should not be provided if --all flag is already present.
    """
    context = load_context(Path.cwd())

    if not filepath and not all_flag:
        secho(
            "Please specify a notebook filepath "
            "or add '--all' to convert all notebooks."
        )
        sys.exit(1)

    kedro_project_path = context.project_path
    kedro_package_name = "kedro_demo_feb2020"

    if all_flag:
        # pathlib glob does not ignore hidden directories,
        # whereas Python glob does, which is more useful in
        # ensuring checkpoints will not be included
        pattern = kedro_project_path / "**" / "*.ipynb"
        notebooks = sorted(Path(p) for p in iglob(str(pattern), recursive=True))
    else:
        notebooks = [Path(f) for f in filepath]

    counter = Counter(n.stem for n in notebooks)
    non_unique_names = [name for name, counts in counter.items() if counts > 1]
    if non_unique_names:
        raise KedroCliError(
            "Found non-unique notebook names! "
            "Please rename the following: {}".format(", ".join(non_unique_names))
        )

    for notebook in notebooks:
        secho("Converting notebook '{}'...".format(str(notebook)))
        output_path = (
            kedro_project_path
            / "src"
            / kedro_package_name
            / "nodes"
            / "{}.py".format(notebook.stem)
        )

        if output_path.is_file():
            overwrite = overwrite_flag or click.confirm(
                "Output file {} already exists. Overwrite?".format(str(output_path)),
                default=False,
            )
            if overwrite:
                export_nodes(notebook, output_path)
        else:
            export_nodes(notebook, output_path)

    secho("Done!")
Пример #2
0
    def test_export_nodes_nothing_to_write(self, project_path, nodes_path):
        nodes = json.dumps(
            {
                "cells": [
                    {
                        "cell_type": "code",
                        "source": "print('hello world')",
                        "metadata": {},
                    },
                    {
                        "cell_type": "text",
                        "source": "hello world",
                        "metadata": {"tags": ["node"]},
                    },
                ]
            }
        )
        notebook_file = project_path / "notebook.iypnb"
        notebook_file.write_text(nodes)

        with warns(UserWarning, match="Skipping notebook"):
            output_path = nodes_path / "{}.py".format(notebook_file.stem)
            export_nodes(notebook_file, output_path)

        output_path = nodes_path / "notebook.py"
        assert not output_path.exists()
Пример #3
0
    def test_export_nodes_different_notebook_paths(self, project_path, nodes_path):
        nodes = json.dumps(
            {
                "cells": [
                    {
                        "cell_type": "code",
                        "source": "print('hello world')",
                        "metadata": {"tags": ["node"]},
                    }
                ]
            }
        )
        notebook_file1 = project_path / "notebook1.ipynb"
        notebook_file1.write_text(nodes)
        output_path1 = nodes_path / "notebook1.py"

        notebook_file2 = nodes_path / "notebook2.ipynb"
        notebook_file2.write_text(nodes)
        output_path2 = nodes_path / "notebook2.py"

        export_nodes(notebook_file1, output_path1)
        export_nodes(notebook_file2, output_path2)

        assert output_path1.read_text() == "print('hello world')\n"
        assert output_path2.read_text() == "print('hello world')\n"
Пример #4
0
    def test_export_nodes(self, project_path, nodes_path):
        nodes = json.dumps(
            {
                "cells": [
                    {
                        "cell_type": "code",
                        "source": "print('hello world')",
                        "metadata": {"tags": ["node"]},
                    },
                    {
                        "cell_type": "code",
                        "source": "print(10+5)",
                        "metadata": {"tags": ["node"]},
                    },
                    {"cell_type": "code", "source": "a = 10", "metadata": {}},
                ]
            }
        )
        notebook_file = project_path / "notebook.ipynb"
        notebook_file.write_text(nodes)

        output_path = nodes_path / "{}.py".format(notebook_file.stem)
        export_nodes(notebook_file, output_path)

        assert output_path.is_file()
        assert output_path.read_text() == "print('hello world')\nprint(10+5)\n"
Пример #5
0
    def test_export_nodes_json_error(self, nodes_path):
        random_file = nodes_path / "notebook.txt"
        random_file.touch()
        random_file.write_text("original")
        output_path = nodes_path / "{}.py".format(random_file.stem)

        pattern = "Provided filepath is not a Jupyter notebook"
        with raises(KedroCliError, match=pattern):
            export_nodes(random_file, output_path)
Пример #6
0
    def test_export_nodes_overwrite(self, project_path, nodes_path):
        existing_nodes = nodes_path / "notebook.py"
        existing_nodes.touch()
        existing_nodes.write_text("original")

        nodes = json.dumps({
            "cells": [{
                "cell_type": "code",
                "source": "print('hello world')",
                "metadata": {
                    "tags": ["node"]
                },
            }]
        })
        notebook_file = project_path / "notebook.iypnb"
        notebook_file.write_text(nodes)

        output_path = nodes_path / "{}.py".format(notebook_file.stem)
        export_nodes(notebook_file, output_path)

        assert output_path.is_file()
        assert output_path.read_text() == "print('hello world')\n"