Exemplo n.º 1
0
Arquivo: nb.py Projeto: sylus/kale
def compile_notebook(request, source_notebook_path,
                     notebook_metadata_overrides=None, debug=False):
    """Compile the notebook to KFP DSL."""
    instance = Kale(source_notebook_path, notebook_metadata_overrides, debug)
    instance.logger = request.log if hasattr(request, "log") else logger

    pipeline_graph, pipeline_parameters = instance.notebook_to_graph()
    script_path = instance.generate_kfp_executable(pipeline_graph,
                                                   pipeline_parameters)

    pipeline_name = instance.pipeline_metadata["pipeline_name"]
    package_path = kfputils.compile_pipeline(script_path, pipeline_name)

    return {"pipeline_package_path": os.path.relpath(package_path),
            "pipeline_metadata": instance.pipeline_metadata}
Exemplo n.º 2
0
def main():
    parser = argparse.ArgumentParser(description=ARGS_DESC, formatter_class=RawTextHelpFormatter)
    general_group = parser.add_argument_group('General')
    general_group.add_argument('--nb', type=str, help='Path to source JupyterNotebook', required=True)
    # use store_const instead of store_true because we None instead of False in case the flag is missing
    general_group.add_argument('--upload_pipeline', action='store_const', const=True)
    general_group.add_argument('--run_pipeline', action='store_const', const=True)
    general_group.add_argument('--debug', action='store_true')

    metadata_group = parser.add_argument_group('Notebook Metadata Overrides', METADATA_GROUP_DESC)
    metadata_group.add_argument('--experiment_name', type=str, help='Name of the created experiment')
    metadata_group.add_argument('--pipeline_name', type=str, help='Name of the deployed pipeline')
    metadata_group.add_argument('--pipeline_description', type=str, help='Description of the deployed pipeline')
    metadata_group.add_argument('--docker_image', type=str, help='Docker base image used to build the pipeline steps')
    metadata_group.add_argument('--kfp_host', type=str, help='KFP endpoint. Provide address as <host>:<port>.')

    args = parser.parse_args()

    # get the notebook metadata args group
    metadata_overrides_group = next(filter(lambda x: x.title == 'Notebook Metadata Overrides', parser._action_groups))
    # get the single args of that group
    metadata_overrides_group_dict = {a.dest: getattr(args, a.dest, None) for a in metadata_overrides_group._group_actions}

    kale = Kale(
        source_notebook_path=args.nb,
        notebook_metadata_overrides=metadata_overrides_group_dict,
        debug=args.debug
    )
    pipeline_graph, pipeline_parameters = kale.notebook_to_graph()
    script_path = kale.generate_kfp_executable(pipeline_graph, pipeline_parameters)
    # compile the pipeline to kfp tar package
    pipeline_package_path = kfp_utils.compile_pipeline(script_path, kale.pipeline_metadata['pipeline_name'])

    if args.upload_pipeline:
        kfp_utils.upload_pipeline(
            pipeline_package_path=pipeline_package_path,
            pipeline_name=kale.pipeline_metadata['pipeline_name'],
            host=kale.pipeline_metadata.get('kfp_host', None)
        )

    if args.run_pipeline:
        kfp_utils.run_pipeline(
            run_name=kale.pipeline_metadata['pipeline_name'] + '_run',
            experiment_name=kale.pipeline_metadata['experiment_name'],
            pipeline_package_path=pipeline_package_path,
            host=kale.pipeline_metadata.get('kfp_host', None)
        )
Exemplo n.º 3
0
Arquivo: nb.py Projeto: zlapp/kale
def compile_notebook(source_notebook_path,
                     notebook_metadata_overrides=None,
                     debug=False,
                     auto_snapshot=False):
    instance = Kale(source_notebook_path, notebook_metadata_overrides, debug,
                    auto_snapshot)
    pipeline_graph, pipeline_parameters = instance.notebook_to_graph()
    script_path = instance.generate_kfp_executable(pipeline_graph,
                                                   pipeline_parameters)

    pipeline_name = instance.pipeline_metadata["pipeline_name"]
    package_path = kfp_utils.compile_pipeline(script_path, pipeline_name)

    return {
        "pipeline_package_path": package_path,
        "pipeline_metadata": instance.pipeline_metadata
    }
def test_pipeline_generation_from_local(random_string, abs_working_dir):
    """Test code generation end to end from notebook to DSL."""
    abs_working_dir.return_value = '/kale'
    random_string.return_value = 'rnd'
    notebook_path = "../assets/notebooks/pipeline_parameters_and_metrics.ipynb"
    notebook_path = os.path.join(THIS_DIR, notebook_path)

    kale = Kale(source_notebook_path=notebook_path)
    kale.logger = logging.getLogger(__name__)
    kale.logger.setLevel(logging.DEBUG)
    pipeline_graph, pipeline_parameters = kale.notebook_to_graph()
    script_path = kale.generate_kfp_executable(pipeline_graph,
                                               pipeline_parameters,
                                               save_to_tmp=True)

    target_asset = os.path.join(THIS_DIR, '../assets/kfp_dsl/',
                                'pipeline_parameters_and_metrics.py')
    expected_result = open(target_asset).read()
    result = open(script_path).read()
    assert result == expected_result
def test_pipeline_generation_from_gtihub(random_string, abs_working_dir):
    """Test code generation end to end from notebook to DSL."""
    abs_working_dir.return_value = '/kale'
    random_string.return_value = 'rnd'
    notebook_url = EX_REPO + "titanic-ml-dataset/titanic_dataset_ml.ipynb"
    # download notebook to tmp dir
    notebook_path, response = urlretrieve(notebook_url)

    kale = Kale(source_notebook_path=notebook_path)
    kale.logger = logging.getLogger(__name__)
    kale.logger.setLevel(logging.DEBUG)
    pipeline_graph, pipeline_parameters = kale.notebook_to_graph()
    script_path = kale.generate_kfp_executable(pipeline_graph,
                                               pipeline_parameters,
                                               save_to_tmp=True)

    target_asset = os.path.join(THIS_DIR, '../assets/kfp_dsl/', 'titanic.py')
    expected_result = open(target_asset).read()
    result = open(script_path).read()
    assert result == expected_result
Exemplo n.º 6
0
def test_pipeline_generation_from_local(random_string, abs_working_dir):
    """Test code generation end to end from notebook to DSL."""
    abs_working_dir.return_value = '/kale'
    random_string.return_value = 'rnd'
    notebook_path = "../assets/notebooks/pipeline_parameters_and_metrics.ipynb"
    notebook_path = os.path.join(THIS_DIR, notebook_path)

    kale = Kale(source_notebook_path=notebook_path)
    pipeline_graph, pipeline_parameters = kale.notebook_to_graph()
    script_path = kale.generate_kfp_executable(pipeline_graph,
                                               pipeline_parameters,
                                               save_to_tmp=True)
    # TODO: Need to suppress log generation when running tests
    os.remove(os.path.join(os.getcwd(), 'kale.log'))

    target_asset = os.path.join(THIS_DIR, '../assets/kfp_dsl/',
                                'pipeline_parameters_and_metrics.py')
    expected_result = open(target_asset).read()
    result = open(script_path).read()
    assert result == expected_result