示例#1
0
文件: kfputils.py 项目: noushi/kale
def update_uimetadata(artifact_name,
                      uimetadata_path='/mlpipeline-ui-metadata.json'):
    """Update ui-metadata dictionary with a new web-app entry.

    Args:
        artifact_name: Name of the artifact
        uimetadata_path: path to mlpipeline-ui-metadata.json
    """
    # Default empty ui-metadata dict
    outputs = {"outputs": []}
    if os.path.exists(uimetadata_path):
        try:
            outputs = json.loads(open(uimetadata_path, 'r').read())
            if not outputs.get('outputs', None):
                outputs['outputs'] = []
        except json.JSONDecodeError as e:
            print("Failed to parse json file {}: {}\n"
                  "This step will not be able to visualize artifacts in the"
                  " KFP UI".format(uimetadata_path, e))

    pod_name = podutils.get_pod_name()
    namespace = podutils.get_namespace()
    workflow_name = workflowutils.get_workflow_name(pod_name, namespace)
    html_artifact_entry = [{
        'type':
        'web-app',
        'storage':
        'minio',
        'source':
        'minio://mlpipeline/artifacts/{}/{}/{}'.format(workflow_name, pod_name,
                                                       artifact_name + '.tgz')
    }]
    outputs['outputs'] += html_artifact_entry
    with open(uimetadata_path, "w") as f:
        json.dump(outputs, f)
示例#2
0
def update_uimetadata(artifact_name,
                      uimetadata_path=KFP_UI_METADATA_FILE_PATH):
    """Update ui-metadata dictionary with a new web-app entry.

    Args:
        artifact_name: Name of the artifact
        uimetadata_path: path to mlpipeline-ui-metadata.json
    """
    try:
        outputs = get_current_uimetadata(uimetadata_path,
                                         default_if_not_exist=True)
    except json.JSONDecodeError:
        log.error("This step will not be able to visualize artifacts in the"
                  " KFP UI")
        return

    pod_name = podutils.get_pod_name()
    namespace = podutils.get_namespace()
    workflow_name = workflowutils.get_workflow_name(pod_name, namespace)
    html_artifact_entry = [{
        'type':
        'web-app',
        'storage':
        'minio',
        'source':
        'minio://mlpipeline/artifacts/{}/{}/{}'.format(workflow_name, pod_name,
                                                       artifact_name + '.tgz')
    }]
    outputs['outputs'] += html_artifact_entry
    with open(uimetadata_path, "w") as f:
        json.dump(outputs, f)
示例#3
0
def is_kfp_step() -> bool:
    """Detect if running inside a KFP step.

    The detection involves two steps:

      1. Auto-detect if the current Pod is part of an Argo workflow
      2. Read one of the annotations that the KFP API Server sets in the
         workflow object (one-off runs and recurring ones have different
         annotations).
    """
    log.info("Checking if running inside a KFP step...")
    try:
        namespace = podutils.get_namespace()
        workflow = workflowutils.get_workflow(
            workflowutils.get_workflow_name(podutils.get_pod_name(),
                                            namespace), namespace)
        annotations = workflow["metadata"]["annotations"]
        try:
            _ = annotations[KFP_RUN_NAME_ANNOTATION_KEY]
        except KeyError:
            _ = annotations[KFP_SWF_NAME_ANNOTATION_KEY]
    except Exception:
        log.info("Not in a KFP step.")
        return False
    log.info("Running in a KFP step.")
    return True
示例#4
0
def snapshot_pod(bucket=DEFAULT_BUCKET, wait=False, interactive=False):
    """Take a Rok snapshot of the current Pod."""
    rok = get_client()
    pod_name = podutils.get_pod_name()
    namespace = podutils.get_namespace()
    log.info("Taking a snapshot of pod %s in namespace %s ..." %
             (pod_name, namespace))
    commit_title = "Snapshot of pod {}".format(pod_name)
    commit_message = NOTEBOOK_SNAPSHOT_COMMIT_MESSAGE.format(
        pod_name, namespace)
    params = {
        "pod": pod_name,
        "default_container": podutils.get_container_name(),
        "namespace": namespace,
        "commit_title": commit_title,
        "commit_message": commit_message
    }

    # Create the bucket in case it does not exist
    create_rok_bucket(bucket)

    task_info = rok.version_register(bucket,
                                     pod_name,
                                     "pod",
                                     params,
                                     wait=wait and not interactive)
    if wait:
        if interactive:
            task_id = task_info["task"]["id"]
            return monitor_snapshot_task(task_id)
        else:
            log.info("Successfully took Rok snapshot")
    return task_info
示例#5
0
    def __init__(self):
        log.info("%s Initializing MLMD context... %s", "-" * 10, "-" * 10)
        log.info("Connecting to MLMD...")
        self.store = self._connect()
        log.info("Successfully connected to MLMD")
        log.info("Getting step details...")
        log.info("Getting pod name...")
        self.pod_name = podutils.get_pod_name()
        log.info("Successfully retrieved pod name: %s", self.pod_name)
        log.info("Getting pod namespace...")
        self.pod_namespace = podutils.get_namespace()
        log.info("Successfully retrieved pod namespace: %s",
                 self.pod_namespace)
        log.info("Getting pod...")
        self.pod = podutils.get_pod(self.pod_name, self.pod_namespace)
        log.info("Successfully retrieved pod")
        log.info("Getting workflow name from pod...")
        self.workflow_name = self.pod.metadata.labels.get(
            workflowutils.ARGO_WORKFLOW_LABEL_KEY)
        log.info("Successfully retrieved workflow name: %s",
                 self.workflow_name)
        log.info("Getting workflow...")
        self.workflow = workflowutils.get_workflow(self.workflow_name,
                                                   self.pod_namespace)
        log.info("Successfully retrieved workflow")

        workflow_labels = self.workflow["metadata"].get("labels", {})
        self.run_uuid = workflow_labels.get(podutils.KFP_RUN_ID_LABEL_KEY,
                                            self.workflow_name)
        log.info("Successfully retrieved KFP run ID: %s", self.run_uuid)

        workflow_annotations = self.workflow["metadata"].get("annotations", {})
        pipeline_spec = json.loads(
            workflow_annotations.get("pipelines.kubeflow.org/pipeline_spec",
                                     "{}"))
        self.pipeline_name = pipeline_spec.get("name", self.workflow_name)
        if self.pipeline_name:
            log.info("Successfully retrieved KFP pipeline_name: %s",
                     self.pipeline_name)
        else:
            log.info("Could not retrieve KFP pipeline name")

        self.component_id = podutils.compute_component_id(self.pod)
        self.execution_hash = self.pod.metadata.annotations.get(
            MLMD_EXECUTION_HASH_PROPERTY_KEY)
        if self.execution_hash:
            log.info("Successfully retrieved execution hash: %s",
                     self.execution_hash)
        else:
            self.execution_hash = utils.random_string(10)
            log.info(
                "Failed to retrieve execution hash."
                " Generating random string...: %s", self.execution_hash)

        self.run_context = self._get_or_create_run_context()
        self.execution = self._create_execution_in_run_context()
        self._label_with_context_and_execution()
        log.info("%s Successfully initialized MLMD context %s", "-" * 10,
                 "-" * 10)
示例#6
0
文件: nb.py 项目: ydataai/kale
def find_poddefault_labels_on_server(request):
    """Find server's labels that correspond to poddefaults applied."""
    request.log.info("Retrieving PodDefaults applied to server...")
    applied_poddefaults = kfutils.find_applied_poddefaults(
        podutils.get_pod(podutils.get_pod_name(), podutils.get_namespace()),
        kfutils.list_poddefaults())
    pd_names = [pd["metadata"]["name"] for pd in applied_poddefaults]
    request.log.info("Retrieved applied PodDefaults: %s", pd_names)

    labels = kfutils.get_poddefault_labels(applied_poddefaults)
    request.log.info("PodDefault labels applied on server: %s",
                     ", ".join(["%s: %s" % (k, v) for k, v in labels.items()]))
    return labels
示例#7
0
def detect_run_uuid() -> str:
    """Get the workflow's UUID form inside a pipeline step."""
    namespace = podutils.get_namespace()
    workflow = workflowutils.get_workflow(
        workflowutils.get_workflow_name(podutils.get_pod_name(), namespace),
        namespace)
    run_uuid = (workflow["metadata"].get("labels",
                                         {}).get(KFP_RUN_ID_LABEL_KEY, None))

    # KFP api-server adds run UUID as label to workflows for KFP>=0.1.26.
    # Return run UUID if available. Else return workflow UUID to maintain
    # backwards compatibility.
    return run_uuid or workflow["metadata"]["uid"]
示例#8
0
def snapshot_notebook(bucket=DEFAULT_BUCKET, obj=None):
    """Take a Rok snapshot of the current Notebook."""
    rok = get_client()
    pod_name = podutils.get_pod_name()
    namespace = podutils.get_namespace()
    log.info("Taking a snapshot of notebook %s in namespace %s ..."
             % (pod_name, namespace))
    commit_title = "Snapshot of notebook {}".format(pod_name)
    commit_message = NOTEBOOK_SNAPSHOT_COMMIT_MESSAGE.format(pod_name,
                                                             namespace)
    params = {"namespace": namespace,
              "commit_title": commit_title,
              "commit_message": commit_message}

    obj = obj or pod_name
    # Create the bucket in case it does not exist
    create_rok_bucket(bucket)
    return rok.version_register(bucket, obj, "jupyter", params)
示例#9
0
文件: rok.py 项目: sylus/kale
def snapshot_notebook(request, bucket=DEFAULT_BUCKET, obj=None):
    """Perform a snapshot over the notebook's pod."""
    rok = _get_client()
    hostname = os.getenv("HOSTNAME")
    namespace = podutils.get_namespace()
    commit_title = "Snapshot of notebook {}".format(hostname)
    commit_message = NOTEBOOK_SNAPSHOT_COMMIT_MESSAGE.format(
        hostname, namespace)
    params = {
        "namespace": namespace,
        "commit_title": commit_title,
        "commit_message": commit_message
    }

    obj = obj or podutils.get_pod_name()
    # Create the bucket in case it does not exist
    podutils.create_rok_bucket(bucket, client=rok)
    return rok.version_register(bucket, obj, "jupyter", params)
示例#10
0
文件: rok.py 项目: sylus/kale
def check_rok_availability(request):
    """Check if Rok is available."""
    log = request.log if hasattr(request, "log") else logger
    try:
        rok = _get_client()
    except ImportError:
        log.exception("Failed to import RokClient")
        raise RPCNotFoundError(details="Rok Gateway Client module not found",
                               trans_id=request.trans_id)
    except Exception:
        log.exception("Failed to initialize RokClient")
        raise RPCServiceUnavailableError(details=("Failed to initialize"
                                                  " RokClient"),
                                         trans_id=request.trans_id)

    try:
        rok.account_info()
    except Exception:
        log.exception("Failed to retrieve account information")
        raise RPCServiceUnavailableError(details="Failed to access Rok",
                                         trans_id=request.trans_id)

    name = podutils.get_pod_name()
    namespace = podutils.get_namespace()
    try:
        suggestions = rok.version_register_suggest(DEFAULT_BUCKET,
                                                   name,
                                                   "jupyter",
                                                   "params:lab",
                                                   {"namespace": namespace},
                                                   ignore_env=True)
    except Exception as e:
        log.exception("Failed to list lab suggestions")
        message = "%s: %s" % (e.__class__.__name__, e)
        raise RPCServiceUnavailableError(message=message,
                                         details=("Rok cannot list notebooks"
                                                  " in this namespace"),
                                         trans_id=request.trans_id)

    if not any(s["value"] == name for s in suggestions):
        log.error("Could not find notebook '%s' in list of suggestions", name)
        raise RPCNotFoundError(details=("Could not find this notebook in"
                                        " notebooks listed by Rok"),
                               trans_id=request.trans_id)
示例#11
0
def update_uimetadata(artifact_name,
                      uimetadata_path=KFP_UI_METADATA_FILE_PATH):
    """Update ui-metadata dictionary with a new web-app entry.

    Args:
        artifact_name: Name of the artifact
        uimetadata_path: path to mlpipeline-ui-metadata.json
    """
    log.info("Adding artifact '%s' to KFP UI metadata...", artifact_name)
    try:
        outputs = get_current_uimetadata(uimetadata_path,
                                         default_if_not_exist=True)
    except json.JSONDecodeError:
        log.error("This step will not be able to visualize artifacts in the"
                  " KFP UI")
        return

    pod_name = podutils.get_pod_name()
    namespace = podutils.get_namespace()
    workflow_name = workflowutils.get_workflow_name(pod_name, namespace)
    html_artifact_entry = [{
        'type':
        'web-app',
        'storage':
        'minio',
        'source':
        'minio://mlpipeline/artifacts/{}/{}/{}'.format(workflow_name, pod_name,
                                                       artifact_name + '.tgz')
    }]
    outputs['outputs'] += html_artifact_entry

    try:
        utils.ensure_or_create_dir(uimetadata_path)
    except RuntimeError:
        log.exception(
            "Writing to '%s' failed. This step will not be able to"
            " visualize artifacts in the KFP UI.", uimetadata_path)
        return
    with open(uimetadata_path, "w") as f:
        json.dump(outputs, f)
    log.info("Artifact successfully added")
示例#12
0
def snapshot_pipeline_step(pipeline, step, nb_path, before=True):
    """Take a snapshot of a pipeline step with Rok."""
    # Mark the start of the snapshotting procedure
    log.info("%s Starting Rok snapshot procedure... (%s) %s", "-" * 10,
             "before" if before else "after", "-" * 10)

    log.info("Retrieving KFP run ID...")
    run_uuid = podutils.get_run_uuid()
    log.info("Retrieved KFP run ID: %s", run_uuid)
    bucket = kfputils.get_experiment_from_run_id(run_uuid).name
    obj = "{}-{}".format(pipeline, run_uuid)
    commit_title = "Step: {} ({})".format(step, "start" if before else "end")
    commit_message = "Autosnapshot {} step '{}' of pipeline run '{}'".format(
        "before" if before else "after", step, run_uuid)
    environment = json.dumps({
        "KALE_PIPELINE_STEP": step,
        "KALE_NOTEBOOK_PATH": nb_path,
        "KALE_SNAPSHOT_FINAL": not before
    })
    metadata = json.dumps({
        "environment": environment,
        "kfp_runid": kfputils.format_kfp_run_id_uri(run_uuid),
        "state": "initial" if before else "final"
    })
    params = {
        "pod": podutils.get_pod_name(),
        "metadata": metadata,
        "default_container": "main",
        "commit_title": commit_title,
        "commit_message": commit_message
    }
    rok = get_client()
    # Create the bucket in case it does not exist
    create_rok_bucket(bucket)
    log.info("Registering Rok version for '%s/%s'...", bucket, obj)
    task_info = rok.version_register(bucket, obj, "pod", params, wait=True)
    # FIXME: How do we retrieve the base URL of the ROK UI?
    version = task_info["task"]["result"]["event"]["version"]
    url_path = (
        "/rok/buckets/%s/files/%s/versions/%s?ns=%s" %
        (utils.encode_url_component(bucket), utils.encode_url_component(obj),
         utils.encode_url_component(version),
         utils.encode_url_component(podutils.get_namespace())))
    log.info("Successfully registered Rok version '%s'", version)

    log.info("Successfully created snapshot for step '%s'", step)
    if before:
        log.info("You can explore the state of the notebook at the beginning"
                 " of this step by spawning a new notebook from the following"
                 " Rok snapshot:")
    log.info("%s", url_path)

    reproduce_steps = ("To **explore the execution state** at the **%s** of"
                       " this step follow the instructions below:\n\n"
                       "1\\. View the [snapshot in the Rok UI](%s).\n\n"
                       "2\\. Copy the Rok URL.\n\n"
                       "3\\. Create a new Notebook Server by using this Rok"
                       " URL to autofill the form.")

    if before:
        md_source = (("# Rok autosnapshot\n"
                      "Rok has successfully created a snapshot for step `%s`."
                      "\n\n" + reproduce_steps) %
                     (step, "beginning", url_path))
    else:
        md_source = (("# Rok final autosnapshot\n"
                      "Rok has successfully created a snapshot **after** the"
                      " execution of step `%s`.\n\n" + reproduce_steps) %
                     (step, "end", url_path))

    try:
        metadataui = kfputils.get_current_uimetadata(default_if_not_exist=True)
    except json.JSONDecodeError:
        log.error("This step will not create a Rok markdown artifact.")
    else:
        metadataui["outputs"].append({
            "storage": "inline",
            "source": md_source,
            "type": "markdown"
        })
        with open(kfputils.KFP_UI_METADATA_FILE_PATH, "w") as f:
            json.dump(metadataui, f)
    # Mark the end of the snapshotting procedure
    log.info("%s Successfully ran Rok snapshot procedure (%s) %s", "-" * 10,
             "before" if before else "after", "-" * 10)

    return task_info
示例#13
0
def serve(model: Any,
          name: str = None,
          wait: bool = True,
          predictor: str = None,
          preprocessing_fn: Callable = None,
          preprocessing_assets: Dict = None) -> KFServer:
    """Main API used to serve models from a notebook or a pipeline step.

    This function procedurally deploys a KFServing InferenceService, starting
    from a model object. A summary list of actions follows:

    * Autogenerate an InferenceService name, if not provided
    * Process transformer function (and related assets)
    * Dump the model, to a path under a mounted PVC
    * Snapshot the PVC
    * Hydrate a new PVC from the new snapshot
    * Submit an InferenceService CR
    * Monitor the CR until it becomes ready

    FIXME: Improve documentation. Provide some examples in the docstring and
      explain how the preprocessing function parsing works.

    Args:
        model: Model object to be used as a predictor
        name (optional): Name of the predictor. Will be autogenerated if not
            provided
        wait (optional): Wait for the InferenceService to become ready.
            Default: True
        predictor (optional): Predictor type to be used for the
            InferenceService. If not provided it will be inferred using
            the the matching marshalling backends.
        preprocessing_fn (optional): A processing function that will be
            deployed as a KFServing Transformer
        preprocessing_assets (optional): A dictionary with object required by
            the preprocessing function. This is needed in case the
            preprocessing function references global objects.

    Returns: A KFServer instance
    """
    log.info("Starting serve procedure for model '%s'", model)
    if not name:
        name = "%s-%s" % (podutils.get_pod_name(), utils.random_string(5))

    # Validate and process transformer
    if preprocessing_fn:
        _prepare_transformer_assets(preprocessing_fn, preprocessing_assets)

    # Detect predictor type
    predictor_type = marshal.get_backend(model).predictor_type
    if predictor and predictor != predictor_type:
        raise RuntimeError("Trying to create an InferenceService with"
                           " predictor of type '%s' but the model is of type"
                           " '%s'" % (predictor, predictor_type))
    if not predictor_type:
        log.error(
            "Kale does not yet support serving objects with '%s'"
            " backend.\n\nPlease help us improve Kale by opening a new"
            " issue at:\n"
            "https://github.com/kubeflow-kale/kale/issues",
            marshal.get_backend(model).display_name)
        utils.graceful_exit(-1)
    predictor = predictor_type  # in case `predictor` is None

    volume = podutils.get_volume_containing_path(PVC_ROOT)
    volume_name = volume[1].persistent_volume_claim.claim_name
    log.info("Model is contained in volume '%s'", volume_name)

    # Dump the model
    marshal.set_data_dir(PREDICTOR_MODEL_DIR)
    model_filepath = marshal.save(model, "model")
    log.info("Model saved successfully at '%s'", model_filepath)

    # Take snapshot
    task_info = rokutils.snapshot_pvc(volume_name,
                                      bucket=rokutils.SERVING_BUCKET,
                                      wait=True)
    task = rokutils.get_task(task_info["task"]["id"],
                             bucket=rokutils.SERVING_BUCKET)
    new_pvc_name = "%s-pvc-%s" % (name, utils.random_string(5))
    rokutils.hydrate_pvc_from_snapshot(task["result"]["event"]["object"],
                                       task["result"]["event"]["version"],
                                       new_pvc_name,
                                       bucket=rokutils.SERVING_BUCKET)

    # Cleanup: remove dumped model and transformer assets from the current PVC
    utils.rm_r(
        os.path.join(PREDICTOR_MODEL_DIR, os.path.basename(model_filepath)))
    utils.rm_r(TRANSFORMER_ASSETS_DIR, silent=True)

    # Need an absolute path from the *root* of the PVC. Add '/' if not exists.
    pvc_model_path = "/" + PREDICTOR_MODEL_DIR.lstrip(PVC_ROOT)
    # Tensorflow saves the model's files into a directory by itself
    if predictor == "tensorflow":
        pvc_model_path += "/" + os.path.basename(model_filepath).lstrip("/")

    kfserver = create_inference_service(name=name,
                                        predictor=predictor,
                                        pvc_name=new_pvc_name,
                                        model_path=pvc_model_path,
                                        transformer=preprocessing_fn
                                        is not None)

    if wait:
        monitor_inference_service(kfserver.name)
    return kfserver