def compile_notebook(request, source_notebook_path, notebook_metadata_overrides=None, debug=False): """Compile the notebook to KFP DSL.""" instance = Kale(source_notebook_path, notebook_metadata_overrides, debug) instance.logger = request.log if hasattr(request, "log") else logger pipeline_graph, pipeline_parameters = instance.notebook_to_graph() script_path = instance.generate_kfp_executable(pipeline_graph, pipeline_parameters) pipeline_name = instance.pipeline_metadata["pipeline_name"] package_path = kfputils.compile_pipeline(script_path, pipeline_name) return {"pipeline_package_path": os.path.relpath(package_path), "pipeline_metadata": instance.pipeline_metadata}
def main(): parser = argparse.ArgumentParser(description=ARGS_DESC, formatter_class=RawTextHelpFormatter) general_group = parser.add_argument_group('General') general_group.add_argument('--nb', type=str, help='Path to source JupyterNotebook', required=True) # use store_const instead of store_true because we None instead of False in case the flag is missing general_group.add_argument('--upload_pipeline', action='store_const', const=True) general_group.add_argument('--run_pipeline', action='store_const', const=True) general_group.add_argument('--debug', action='store_true') metadata_group = parser.add_argument_group('Notebook Metadata Overrides', METADATA_GROUP_DESC) metadata_group.add_argument('--experiment_name', type=str, help='Name of the created experiment') metadata_group.add_argument('--pipeline_name', type=str, help='Name of the deployed pipeline') metadata_group.add_argument('--pipeline_description', type=str, help='Description of the deployed pipeline') metadata_group.add_argument('--docker_image', type=str, help='Docker base image used to build the pipeline steps') metadata_group.add_argument('--kfp_host', type=str, help='KFP endpoint. Provide address as <host>:<port>.') args = parser.parse_args() # get the notebook metadata args group metadata_overrides_group = next(filter(lambda x: x.title == 'Notebook Metadata Overrides', parser._action_groups)) # get the single args of that group metadata_overrides_group_dict = {a.dest: getattr(args, a.dest, None) for a in metadata_overrides_group._group_actions} kale = Kale( source_notebook_path=args.nb, notebook_metadata_overrides=metadata_overrides_group_dict, debug=args.debug ) pipeline_graph, pipeline_parameters = kale.notebook_to_graph() script_path = kale.generate_kfp_executable(pipeline_graph, pipeline_parameters) # compile the pipeline to kfp tar package pipeline_package_path = kfp_utils.compile_pipeline(script_path, kale.pipeline_metadata['pipeline_name']) if args.upload_pipeline: kfp_utils.upload_pipeline( pipeline_package_path=pipeline_package_path, pipeline_name=kale.pipeline_metadata['pipeline_name'], host=kale.pipeline_metadata.get('kfp_host', None) ) if args.run_pipeline: kfp_utils.run_pipeline( run_name=kale.pipeline_metadata['pipeline_name'] + '_run', experiment_name=kale.pipeline_metadata['experiment_name'], pipeline_package_path=pipeline_package_path, host=kale.pipeline_metadata.get('kfp_host', None) )
def compile_notebook(source_notebook_path, notebook_metadata_overrides=None, debug=False, auto_snapshot=False): instance = Kale(source_notebook_path, notebook_metadata_overrides, debug, auto_snapshot) pipeline_graph, pipeline_parameters = instance.notebook_to_graph() script_path = instance.generate_kfp_executable(pipeline_graph, pipeline_parameters) pipeline_name = instance.pipeline_metadata["pipeline_name"] package_path = kfp_utils.compile_pipeline(script_path, pipeline_name) return { "pipeline_package_path": package_path, "pipeline_metadata": instance.pipeline_metadata }
def test_pipeline_generation_from_local(random_string, abs_working_dir): """Test code generation end to end from notebook to DSL.""" abs_working_dir.return_value = '/kale' random_string.return_value = 'rnd' notebook_path = "../assets/notebooks/pipeline_parameters_and_metrics.ipynb" notebook_path = os.path.join(THIS_DIR, notebook_path) kale = Kale(source_notebook_path=notebook_path) kale.logger = logging.getLogger(__name__) kale.logger.setLevel(logging.DEBUG) pipeline_graph, pipeline_parameters = kale.notebook_to_graph() script_path = kale.generate_kfp_executable(pipeline_graph, pipeline_parameters, save_to_tmp=True) target_asset = os.path.join(THIS_DIR, '../assets/kfp_dsl/', 'pipeline_parameters_and_metrics.py') expected_result = open(target_asset).read() result = open(script_path).read() assert result == expected_result
def test_pipeline_generation_from_gtihub(random_string, abs_working_dir): """Test code generation end to end from notebook to DSL.""" abs_working_dir.return_value = '/kale' random_string.return_value = 'rnd' notebook_url = EX_REPO + "titanic-ml-dataset/titanic_dataset_ml.ipynb" # download notebook to tmp dir notebook_path, response = urlretrieve(notebook_url) kale = Kale(source_notebook_path=notebook_path) kale.logger = logging.getLogger(__name__) kale.logger.setLevel(logging.DEBUG) pipeline_graph, pipeline_parameters = kale.notebook_to_graph() script_path = kale.generate_kfp_executable(pipeline_graph, pipeline_parameters, save_to_tmp=True) target_asset = os.path.join(THIS_DIR, '../assets/kfp_dsl/', 'titanic.py') expected_result = open(target_asset).read() result = open(script_path).read() assert result == expected_result
def test_pipeline_generation_from_local(random_string, abs_working_dir): """Test code generation end to end from notebook to DSL.""" abs_working_dir.return_value = '/kale' random_string.return_value = 'rnd' notebook_path = "../assets/notebooks/pipeline_parameters_and_metrics.ipynb" notebook_path = os.path.join(THIS_DIR, notebook_path) kale = Kale(source_notebook_path=notebook_path) pipeline_graph, pipeline_parameters = kale.notebook_to_graph() script_path = kale.generate_kfp_executable(pipeline_graph, pipeline_parameters, save_to_tmp=True) # TODO: Need to suppress log generation when running tests os.remove(os.path.join(os.getcwd(), 'kale.log')) target_asset = os.path.join(THIS_DIR, '../assets/kfp_dsl/', 'pipeline_parameters_and_metrics.py') expected_result = open(target_asset).read() result = open(script_path).read() assert result == expected_result