示例#1
0
def run_notebook(id, run_name=None, parameters: dict = None):  # noqa: E501
    """run_notebook

    :param id: 
    :type id: str
    :param run_name: name to identify the run on the Kubeflow Pipelines UI, defaults to notebook name
    :type run_name: str
    :param parameters: optional run parameters, may be required based on pipeline definition
    :type parameters: dict

    :rtype: ApiRunCodeResponse
    """
    if not parameters and connexion.request.is_json:
        parameter_dict = dict(connexion.request.get_json())  # noqa: E501
    else:
        parameter_dict = parameters

    api_notebook, status_code = get_notebook(id)

    if status_code > 200:
        return f"Notebook with id '{id}' does not exist", 404

    # # TODO: Elyra kfp-notebook currently does not pass parameters on to papermill
    # if parameters:
    #     raise ApiError("The 'elyra-ai/kfp-notebook' executor does not support parameters", 422)

    # parameter_errors, status_code = validate_parameters(api_notebook.parameters, parameter_dict)
    #
    # if parameter_errors:
    #     return parameter_errors, status_code

    try:
        run_id = run_notebook_in_experiment(notebook=api_notebook,
                                            parameters=parameter_dict,
                                            run_name=run_name)

        # expected output notebook based on:
        #   https://github.com/elyra-ai/kfp-notebook/blob/c8f1298/etc/docker-scripts/bootstrapper.py#L188-L190
        notebook_url = get_object_url(bucket_name="mlpipeline",
                                      prefix=f"notebooks/{api_notebook.id}/",
                                      file_extensions=[".ipynb"])
        # TODO: create a "sandboxed" notebook in a subfolder since Elyra overwrites
        #   the original notebook instead of creating an "-output.ipynb" file:
        #   https://github.com/elyra-ai/kfp-notebook/blob/c8f1298/etc/docker-scripts/bootstrapper.py#L205
        notebook_output_url = notebook_url.replace(".ipynb", "-output.ipynb")

        # instead return link to the generated output .html for the time being
        notebook_output_html = notebook_url.replace(".ipynb", ".html")

        return ApiRunCodeResponse(
            run_url=f"/runs/details/{run_id}",
            run_output_location=notebook_output_html), 200
    except Exception as e:

        return f"Error while trying to run notebook {id}: {e}", 500
示例#2
0
def generate_notebook_run_script(api_notebook: ApiNotebook,
                                 parameters: dict = {},
                                 run_name: str = None,
                                 hide_secrets: bool = True):

    if "dataset_pvc" in parameters:
        template_file = "run_notebook_with_dataset.TEMPLATE.py"
    else:
        template_file = "run_notebook.TEMPLATE.py"

    with open(join(CODE_TEMPLATE_DIR, template_file), 'r') as f:
        template_raw = f.read()

    notebook_file = api_notebook.url.split("/")[-1]

    requirements_url = get_object_url(bucket_name="mlpipeline",
                                      prefix=f"notebooks/{api_notebook.id}/",
                                      file_extensions=[".txt"],
                                      file_name_filter="requirements")

    cos_dependencies_archive_url = get_object_url(bucket_name="mlpipeline",
                                                  prefix=f"notebooks/{api_notebook.id}/",
                                                  file_extensions=[".tar.gz"],
                                                  file_name_filter="elyra-dependencies-archive")

    if not cos_dependencies_archive_url:

        tar, bytes_io = create_tarfile(bucket_name="mlpipeline",
                                       prefix=f"notebooks/{api_notebook.id}/",
                                       file_extensions=[".ipynb"])

        cos_dependencies_archive_url = store_file(bucket_name="mlpipeline",
                                                  prefix=f"notebooks/{api_notebook.id}/",
                                                  file_name="elyra-dependencies-archive.tar.gz",
                                                  file_content=bytes_io.getvalue())

    cos_dependencies_archive = cos_dependencies_archive_url.split("/")[-1]

    # TODO: move this into a ApiNotebook.image as opposed to parsing yaml here
    yaml_file_content = retrieve_file_content(bucket_name="mlpipeline",
                                              prefix=f"notebooks/{api_notebook.id}/",
                                              file_extensions=[".yaml", ".yml"])
    metadata_yaml = yaml.load(yaml_file_content, Loader=yaml.FullLoader)

    image = metadata_yaml["implementation"]["github"].get("image", "tensorflow/tensorflow:latest")

    # TODO: elyra-ai/kfp-notebook generates output notebook as: "-output.ipynb"
    #   https://github.com/elyra-ai/kfp-notebook/blob/c8f1298/etc/docker-scripts/bootstrapper.py#L188-L190
    #   so here we may consider renaming the generated file with a datetimestamp
    # output_folder = f"notebooks/{api_notebook.id}/runs/{datetime.now().strftime('%Y%m%d-%H%M%S')}"
    # output_file_name = notebook_file_name.replace(r'.ipynb', '-output.ipynb')
    # output_file_path = f"{output_folder}/{output_file_name}"
    # output_file_url = f"http://{minio_host}:{minio_port}/mlpipeline/{output_file_path}"

    kfp_url = f"'{_pipeline_service_url}'" if "POD_NAMESPACE" not in os.environ else ""

    substitutions = {
        "name": api_notebook.name,
        "notebook": notebook_file,
        "cos_bucket": "mlpipeline",
        "cos_directory": f"notebooks/{api_notebook.id}/",
        "cos_dependencies_archive": cos_dependencies_archive,
        "cos_endpoint": "***",
        "cos_username": "******",
        "cos_password": "******",
        "requirements_url": requirements_url or "",
        "image": image,
        "pipeline_server": kfp_url,
        "run_name": run_name or api_notebook.name
    }

    # TODO: make the `dataset_pvc` and `mount_path` parameters part of the Swagger spec?
    if "dataset_pvc" in parameters:
        substitutions.update({
            "dataset_pvc": parameters["dataset_pvc"],
            "mount_path": parameters.get("mount_path", "/tmp/data")
        })

    if not hide_secrets:
        substitutions.update({
            "cos_endpoint": f"http://{minio_host}:{minio_port}/minio",
            "cos_username": minio_access_key,
            "cos_password": minio_secret_key
        })

    run_script = Template(template_raw).substitute(substitutions)

    return run_script
示例#3
0
def generate_dataset_run_script(dataset: ApiDataset, dataset_template_url, run_parameters=dict(),
                                run_name: str = None, fail_on_missing_prereqs=False):

    name = f"{dataset.name} ({generate_id(length=4)})"
    description = dataset.description.strip().replace("'", "\\'")

    # TODO: some of the parameters, template URLs should move out of here

    # dataset_parameters = dataset.parameters
    # TODO: ApiParameters should not be defined here
    dataset_parameters = [ApiParameter(name="action", default="create"),
                          ApiParameter(name="namespace", default=_namespace)]

    pipeline_method_args = generate_pipeline_method_args(dataset_parameters)

    parameter_names = ",".join([p.name for p in dataset_parameters])

    # TODO: the action parameter is required by DLF-to-PVC op, so it should not be dynamically generated here
    parameter_dict = {
        "action": "create",
        "namespace": run_parameters.get("namespace", _namespace)
    }

    # see component name at https://github.com/machine-learning-exchange/mlx/blob/main/components/component-samples/dax-to-dlf/component.yaml#L1
    dax_to_dlf_component_id = generate_id(name="Generate Dataset Metadata")

    # see component name at https://github.com/machine-learning-exchange/mlx/blob/main/components/component-samples/dlf/component.yaml#L1
    dlf_to_pvc_component_id = generate_id(name="Create Dataset Volume")

    dax_to_dlf_component_url = get_object_url(bucket_name="mlpipeline",
                                              prefix=f"components/{dax_to_dlf_component_id}/",
                                              file_extensions=[".yaml"])

    dlf_to_pvc_component_url = get_object_url(bucket_name="mlpipeline",
                                              prefix=f"components/{dlf_to_pvc_component_id}/",
                                              file_extensions=[".yaml"])

    if fail_on_missing_prereqs:

        if not dax_to_dlf_component_url:
            raise ApiError(f"Missing required component '{dax_to_dlf_component_id}'")

        if not dlf_to_pvc_component_url:
            raise ApiError(f"Missing required component '{dlf_to_pvc_component_id}'")

    namespace = run_parameters.get("namespace", _namespace)

    pipeline_server = "" if "POD_NAMESPACE" in os.environ else f"'{_pipeline_service_url}'"

    run_name = (run_name or "").replace("'", "\"") or dataset.name

    substitutions = dict(locals())

    template_file = f"run_dataset.TEMPLATE.py"

    with open(join(CODE_TEMPLATE_DIR, template_file), 'r') as f:
        template_raw = f.read()

    template_rendered = Template(template_raw).substitute(substitutions)

    run_script = autopep8.fix_code(template_rendered, options={"aggressive": 2})

    return run_script
示例#4
0
def generate_custom_pipeline_function_body(custom_pipeline: ApiPipelineCustom, hide_secrets=True):

    function_body = """
    from kfp import components
    """

    component_template_raw = """
    ${comp_name} = components.load_component_from_url('${template_url}')
    ${op_name} = ${comp_name}(${component_args})
    """

    op_dependency_template_raw = """
    ${op_name}.after(${required_op_name})
    """

    for task in custom_pipeline.dag.tasks:
        parameters = []

        if task.artifact_type == "notebook":
            component_s3_prefix = f"components/jupyter/"
            notebook_url = get_object_url(bucket_name="mlpipeline",
                                          prefix=f"notebooks/{task.artifact_id}/",
                                          file_extensions=[".ipynb"])

            if not notebook_url:
                raise ApiError(f"Could not find notebook '{task.artifact_id}'")

            task_parameters = list(task.arguments.parameters) if task.arguments and task.arguments.parameters else []

            for p in task_parameters:
                if type(p.value) == str and p.value.startswith("{{inputs.parameters."):
                    raise ApiError("Referencing '{{inputs.parameters.*}}' is not supported for notebook parameter"
                                   f" values: {task.to_dict()}", 422)

            notebook_parameters = {p.name: p.value or p.default for p in task_parameters}
            notebook_parameters_str = json.dumps(notebook_parameters) if notebook_parameters else ""

            jupyter_component_parameters = {
                "notebook_url": notebook_url,
                "notebook_params": notebook_parameters_str,
                "api_token": "",
                "endpoint_url": "",
                "bucket_name": "",
                "object_name": "",
                "access_key": "",
                "secret_access_key": ""
            }

            if not hide_secrets:
                output_folder = f"notebooks/{task.artifact_id}/runs/{datetime.now().strftime('%Y%m%d-%H%M%S')}"
                notebook_file_name = notebook_url.split("/")[-1]
                output_file_name = notebook_file_name.replace(r'.ipynb', '_out.ipynb')
                output_file_path = f"{output_folder}/{output_file_name}"
                output_bucket = "mlpipeline"

                jupyter_component_parameters.update({
                    "endpoint_url": "minio-service:9000",  # f"{minio_host}:{minio_port}",
                    "bucket_name": output_bucket,
                    "object_name": output_file_path,
                    "access_key": minio_access_key,
                    "secret_access_key": minio_secret_key
                })

            for name, value in jupyter_component_parameters.items():
                parameters.append(f"{name} = '{value}'")

        elif task.artifact_type == "component":
            component_s3_prefix = f"components/{task.artifact_id}/"

            # replace parameter values that reference pipeline input parameters {{inputs.parameters.parameter_name}}
            task_parameters = list(task.arguments.parameters) if task.arguments and task.arguments.parameters else []

            missing_parameter_values = [p.name for p in task_parameters
                                        if not p.value and not p.default and p.description \
                                        and p.description.title().startswith("Required")]

            if missing_parameter_values:
                raise ApiError(f"Missing required task parameters {missing_parameter_values}", 422)

            for p in task_parameters:

                if type(p.value) == str and p.value.startswith("{{inputs.parameters."):
                    match = re.match(r"{{inputs.parameters.(?P<pipeline_parameter_name>\w+)}}", p.value)

                    if not match:
                        raise ApiError(f"Cannot match pipeline input.parameter '{p.value}'", 422)

                    pipeline_param_ref = match.groupdict().get("pipeline_parameter_name")
                    parameters.append(f"{p.name} = {pipeline_param_ref}")

                else:
                    arg = generate_method_arg_from_parameter(p)
                    parameters.append(arg)

        else:
            raise ApiError(f"Unknown or unsupported artifact_type '{task.artifact_type}':\n'{task}'", 422)

        comp_name = "comp_" + re.sub(r"\W+", "_", task.name, flags=re.ASCII).lower()
        op_name = "op_" + re.sub(r"\W+", "_", task.name, flags=re.ASCII).lower()

        template_url = get_object_url(bucket_name="mlpipeline",
                                      prefix=component_s3_prefix,
                                      file_extensions=[".yaml", ".yml"])

        if not template_url:
            raise ApiError(f"Could not find component template '{component_s3_prefix}'")

        substitutions = {
            "comp_name": comp_name,
            "op_name": op_name,
            "template_url": template_url,
            "component_args": ", ".join(parameters)
        }
        template_rendered = Template(component_template_raw).substitute(substitutions)
        function_body += template_rendered

    for task in custom_pipeline.dag.tasks:
        for required_task_name in task.dependencies or []:
            substitutions = {
                "op_name": "op_" + re.sub(r"\W+", "_", task.name, flags=re.ASCII).lower(),
                "required_op_name": "op_" + re.sub(r"\W+", "_", required_task_name, flags=re.ASCII).lower()
            }
            template_rendered = Template(op_dependency_template_raw).substitute(substitutions)
            function_body += template_rendered

    return function_body