def download_pipeline_files(id):  # noqa: E501
    """Returns the pipeline YAML compressed into a .tgz (.tar.gz) file.

    :param id:
    :type id: str

    :rtype: file
    """
    tar, bytes_io = create_tarfile(bucket_name="mlpipeline", prefix=f"pipelines/{id}",
                                   file_extensions=[""],
                                   keep_open=False)

    if len(tar.members) == 0:
        return f"Could not find pipeline with id '{id}'", 404

    return bytes_io.getvalue(), 200, {"Content-Disposition": f"attachment; filename={id}.tgz"}
def download_model_files(id, include_generated_code=None):  # noqa: E501
    """
    Returns the model artifacts compressed into a .tgz (.tar.gz) file.

    :param id:
    :type id: str
    :param include_generated_code: Include generated run scripts in download
    :type include_generated_code: bool

    :rtype: file | binary
    """

    tar, bytes_io = create_tarfile(
        bucket_name="mlpipeline",
        prefix=f"models/{id}/",
        file_extensions=[".yaml", ".yml", ".py", ".md"],
        keep_open=include_generated_code)

    if len(tar.members) == 0:
        return f"Could not find model with id '{id}'", 404

    if include_generated_code:
        generate_code_response: ApiGenerateModelCodeResponse = generate_model_code(
            id)[0]

        for s in generate_code_response.scripts:
            file_name = f"run_{s.pipeline_stage}_{s.execution_platform}.py"

            if file_name in tar.getnames():
                file_name = file_name.replace(".py", "_generated.py")

            file_content = s.script_code
            file_size = len(file_content)
            file_obj = BytesIO(file_content.encode('utf-8'))
            tarinfo = tarfile.TarInfo(name=file_name)
            tarinfo.size = file_size

            tar.addfile(tarinfo, file_obj)

        tar.close()

    return bytes_io.getvalue(), 200, {
        "Content-Disposition": f"attachment; filename={id}.tgz"
    }
def download_dataset_files(id, include_generated_code=None):  # noqa: E501
    """
    Returns the dataset artifacts compressed into a .tgz (.tar.gz) file.

    :param id:
    :type id: str
    :param include_generated_code: Include generated run script in download
    :type include_generated_code: bool

    :rtype: file | binary
    """
    tar, bytes_io = create_tarfile(
        bucket_name="mlpipeline",
        prefix=f"datasets/{id}/",
        file_extensions=[".yaml", ".yml", ".py", ".md"],
        keep_open=include_generated_code)

    if len(tar.members) == 0:
        return f"Could not find dataset with id '{id}'", 404

    if include_generated_code:
        generate_code_response, api_status = generate_dataset_code(id)

        if api_status == 200:
            file_content = generate_code_response.script
            file_name = f"run_dataset.py"

            if file_name in tar.getnames():
                file_name = file_name.replace(".py", "_generated.py")

            tarinfo = tarfile.TarInfo(name=file_name)
            tarinfo.size = len(file_content)
            file_obj = BytesIO(file_content.encode('utf-8'))

            tar.addfile(tarinfo, file_obj)

        tar.close()

    return bytes_io.getvalue(), 200, {
        "Content-Disposition": f"attachment; filename={id}.tgz"
    }
Пример #4
0
def generate_notebook_run_script(api_notebook: ApiNotebook,
                                 parameters: dict = {},
                                 run_name: str = None,
                                 hide_secrets: bool = True):

    if "dataset_pvc" in parameters:
        template_file = "run_notebook_with_dataset.TEMPLATE.py"
    else:
        template_file = "run_notebook.TEMPLATE.py"

    with open(join(CODE_TEMPLATE_DIR, template_file), 'r') as f:
        template_raw = f.read()

    notebook_file = api_notebook.url.split("/")[-1]

    requirements_url = get_object_url(bucket_name="mlpipeline",
                                      prefix=f"notebooks/{api_notebook.id}/",
                                      file_extensions=[".txt"],
                                      file_name_filter="requirements")

    cos_dependencies_archive_url = get_object_url(bucket_name="mlpipeline",
                                                  prefix=f"notebooks/{api_notebook.id}/",
                                                  file_extensions=[".tar.gz"],
                                                  file_name_filter="elyra-dependencies-archive")

    if not cos_dependencies_archive_url:

        tar, bytes_io = create_tarfile(bucket_name="mlpipeline",
                                       prefix=f"notebooks/{api_notebook.id}/",
                                       file_extensions=[".ipynb"])

        cos_dependencies_archive_url = store_file(bucket_name="mlpipeline",
                                                  prefix=f"notebooks/{api_notebook.id}/",
                                                  file_name="elyra-dependencies-archive.tar.gz",
                                                  file_content=bytes_io.getvalue())

    cos_dependencies_archive = cos_dependencies_archive_url.split("/")[-1]

    # TODO: move this into a ApiNotebook.image as opposed to parsing yaml here
    yaml_file_content = retrieve_file_content(bucket_name="mlpipeline",
                                              prefix=f"notebooks/{api_notebook.id}/",
                                              file_extensions=[".yaml", ".yml"])
    metadata_yaml = yaml.load(yaml_file_content, Loader=yaml.FullLoader)

    image = metadata_yaml["implementation"]["github"].get("image", "tensorflow/tensorflow:latest")

    # TODO: elyra-ai/kfp-notebook generates output notebook as: "-output.ipynb"
    #   https://github.com/elyra-ai/kfp-notebook/blob/c8f1298/etc/docker-scripts/bootstrapper.py#L188-L190
    #   so here we may consider renaming the generated file with a datetimestamp
    # output_folder = f"notebooks/{api_notebook.id}/runs/{datetime.now().strftime('%Y%m%d-%H%M%S')}"
    # output_file_name = notebook_file_name.replace(r'.ipynb', '-output.ipynb')
    # output_file_path = f"{output_folder}/{output_file_name}"
    # output_file_url = f"http://{minio_host}:{minio_port}/mlpipeline/{output_file_path}"

    kfp_url = f"'{_pipeline_service_url}'" if "POD_NAMESPACE" not in os.environ else ""

    substitutions = {
        "name": api_notebook.name,
        "notebook": notebook_file,
        "cos_bucket": "mlpipeline",
        "cos_directory": f"notebooks/{api_notebook.id}/",
        "cos_dependencies_archive": cos_dependencies_archive,
        "cos_endpoint": "***",
        "cos_username": "******",
        "cos_password": "******",
        "requirements_url": requirements_url or "",
        "image": image,
        "pipeline_server": kfp_url,
        "run_name": run_name or api_notebook.name
    }

    # TODO: make the `dataset_pvc` and `mount_path` parameters part of the Swagger spec?
    if "dataset_pvc" in parameters:
        substitutions.update({
            "dataset_pvc": parameters["dataset_pvc"],
            "mount_path": parameters.get("mount_path", "/tmp/data")
        })

    if not hide_secrets:
        substitutions.update({
            "cos_endpoint": f"http://{minio_host}:{minio_port}/minio",
            "cos_username": minio_access_key,
            "cos_password": minio_secret_key
        })

    run_script = Template(template_raw).substitute(substitutions)

    return run_script