def convert_notebook(all_flag, overwrite_flag, filepath): """Convert selected or all notebooks found in a Kedro project to Kedro code, by exporting code from the appropriately-tagged cells: Cells tagged as `node` will be copied over to a Python file matching the name of the notebook, under `src/<package_name>/nodes`. *Note*: Make sure your notebooks have unique names! FILEPATH: Path(s) to exact notebook file(s) to be converted. Both relative and absolute paths are accepted. Should not be provided if --all flag is already present. """ context = load_context(Path.cwd()) if not filepath and not all_flag: secho( "Please specify a notebook filepath " "or add '--all' to convert all notebooks." ) sys.exit(1) kedro_project_path = context.project_path kedro_package_name = "kedro_demo_feb2020" if all_flag: # pathlib glob does not ignore hidden directories, # whereas Python glob does, which is more useful in # ensuring checkpoints will not be included pattern = kedro_project_path / "**" / "*.ipynb" notebooks = sorted(Path(p) for p in iglob(str(pattern), recursive=True)) else: notebooks = [Path(f) for f in filepath] counter = Counter(n.stem for n in notebooks) non_unique_names = [name for name, counts in counter.items() if counts > 1] if non_unique_names: raise KedroCliError( "Found non-unique notebook names! " "Please rename the following: {}".format(", ".join(non_unique_names)) ) for notebook in notebooks: secho("Converting notebook '{}'...".format(str(notebook))) output_path = ( kedro_project_path / "src" / kedro_package_name / "nodes" / "{}.py".format(notebook.stem) ) if output_path.is_file(): overwrite = overwrite_flag or click.confirm( "Output file {} already exists. Overwrite?".format(str(output_path)), default=False, ) if overwrite: export_nodes(notebook, output_path) else: export_nodes(notebook, output_path) secho("Done!")
def test_export_nodes_nothing_to_write(self, project_path, nodes_path): nodes = json.dumps( { "cells": [ { "cell_type": "code", "source": "print('hello world')", "metadata": {}, }, { "cell_type": "text", "source": "hello world", "metadata": {"tags": ["node"]}, }, ] } ) notebook_file = project_path / "notebook.iypnb" notebook_file.write_text(nodes) with warns(UserWarning, match="Skipping notebook"): output_path = nodes_path / "{}.py".format(notebook_file.stem) export_nodes(notebook_file, output_path) output_path = nodes_path / "notebook.py" assert not output_path.exists()
def test_export_nodes_different_notebook_paths(self, project_path, nodes_path): nodes = json.dumps( { "cells": [ { "cell_type": "code", "source": "print('hello world')", "metadata": {"tags": ["node"]}, } ] } ) notebook_file1 = project_path / "notebook1.ipynb" notebook_file1.write_text(nodes) output_path1 = nodes_path / "notebook1.py" notebook_file2 = nodes_path / "notebook2.ipynb" notebook_file2.write_text(nodes) output_path2 = nodes_path / "notebook2.py" export_nodes(notebook_file1, output_path1) export_nodes(notebook_file2, output_path2) assert output_path1.read_text() == "print('hello world')\n" assert output_path2.read_text() == "print('hello world')\n"
def test_export_nodes(self, project_path, nodes_path): nodes = json.dumps( { "cells": [ { "cell_type": "code", "source": "print('hello world')", "metadata": {"tags": ["node"]}, }, { "cell_type": "code", "source": "print(10+5)", "metadata": {"tags": ["node"]}, }, {"cell_type": "code", "source": "a = 10", "metadata": {}}, ] } ) notebook_file = project_path / "notebook.ipynb" notebook_file.write_text(nodes) output_path = nodes_path / "{}.py".format(notebook_file.stem) export_nodes(notebook_file, output_path) assert output_path.is_file() assert output_path.read_text() == "print('hello world')\nprint(10+5)\n"
def test_export_nodes_json_error(self, nodes_path): random_file = nodes_path / "notebook.txt" random_file.touch() random_file.write_text("original") output_path = nodes_path / "{}.py".format(random_file.stem) pattern = "Provided filepath is not a Jupyter notebook" with raises(KedroCliError, match=pattern): export_nodes(random_file, output_path)
def test_export_nodes_overwrite(self, project_path, nodes_path): existing_nodes = nodes_path / "notebook.py" existing_nodes.touch() existing_nodes.write_text("original") nodes = json.dumps({ "cells": [{ "cell_type": "code", "source": "print('hello world')", "metadata": { "tags": ["node"] }, }] }) notebook_file = project_path / "notebook.iypnb" notebook_file.write_text(nodes) output_path = nodes_path / "{}.py".format(notebook_file.stem) export_nodes(notebook_file, output_path) assert output_path.is_file() assert output_path.read_text() == "print('hello world')\n"