def __init__(self, trans_id=None, nb_path=None): if not trans_id: trans_id = random_string(size=10) self.log = create_adapter(logging.getLogger(__name__), trans_id, nb_path) self.trans_id = trans_id self.nb_path = nb_path
def __init__(self): log.info("%s Initializing MLMD context... %s", "-" * 10, "-" * 10) log.info("Connecting to MLMD...") self.store = self._connect() log.info("Successfully connected to MLMD") log.info("Getting step details...") log.info("Getting pod name...") self.pod_name = podutils.get_pod_name() log.info("Successfully retrieved pod name: %s", self.pod_name) log.info("Getting pod namespace...") self.pod_namespace = podutils.get_namespace() log.info("Successfully retrieved pod namespace: %s", self.pod_namespace) log.info("Getting pod...") self.pod = podutils.get_pod(self.pod_name, self.pod_namespace) log.info("Successfully retrieved pod") log.info("Getting workflow name from pod...") self.workflow_name = self.pod.metadata.labels.get( workflowutils.ARGO_WORKFLOW_LABEL_KEY) log.info("Successfully retrieved workflow name: %s", self.workflow_name) log.info("Getting workflow...") self.workflow = workflowutils.get_workflow(self.workflow_name, self.pod_namespace) log.info("Successfully retrieved workflow") workflow_labels = self.workflow["metadata"].get("labels", {}) self.run_uuid = workflow_labels.get(podutils.KFP_RUN_ID_LABEL_KEY, self.workflow_name) log.info("Successfully retrieved KFP run ID: %s", self.run_uuid) workflow_annotations = self.workflow["metadata"].get("annotations", {}) pipeline_spec = json.loads( workflow_annotations.get("pipelines.kubeflow.org/pipeline_spec", "{}")) self.pipeline_name = pipeline_spec.get("name", self.workflow_name) if self.pipeline_name: log.info("Successfully retrieved KFP pipeline_name: %s", self.pipeline_name) else: log.info("Could not retrieve KFP pipeline name") self.component_id = podutils.compute_component_id(self.pod) self.execution_hash = self.pod.metadata.annotations.get( MLMD_EXECUTION_HASH_PROPERTY_KEY) if self.execution_hash: log.info("Successfully retrieved execution hash: %s", self.execution_hash) else: self.execution_hash = utils.random_string(10) log.info( "Failed to retrieve execution hash." " Generating random string...: %s", self.execution_hash) self.run_context = self._get_or_create_run_context() self.execution = self._create_execution_in_run_context() self._label_with_context_and_execution() log.info("%s Successfully initialized MLMD context %s", "-" * 10, "-" * 10)
def save_pipeline(self, pipeline_code, output_path=None): """Save Python code to file.""" if output_path is None: # create tmp path tmp_dir = tempfile.mkdtemp() filename = "kale_pipeline_code_{}.py".format( utils.random_string(5)) output_path = os.path.join(tmp_dir, filename) with open(output_path, "w") as f: f.write(pipeline_code) self.logger.info("Pipeline code saved at {}".format(output_path)) return output_path
def parse_metadata(notebook_metadata): """Parse the Notebook's metadata and update it when needed. Args: notebook_metadata (dict): metadata annotated by Kale. Refer to DEFAULT_METADATA for defaults Returns (dict): updated and validated metadata """ # check for required fields before adding all possible defaults validated_notebook_metadata = copy.deepcopy(notebook_metadata) for required in METADATA_REQUIRED_KEYS: if required not in validated_notebook_metadata: raise ValueError( "Key {} not found. Add this field either on" " the notebook metadata or as an override".format(required)) metadata = copy.deepcopy(DEFAULT_METADATA) metadata.update(validated_notebook_metadata) if not re.match(KALE_STEP_NAME_REGEX, metadata['pipeline_name']): raise ValueError("Pipeline name {}".format(KALE_NAME_MSG)) # update the pipeline name with a random string random_pipeline_name = "{}-{}".format(metadata['pipeline_name'], random_string()) metadata['pipeline_name'] = random_pipeline_name volumes = metadata.get('volumes', []) if isinstance(volumes, list): metadata.update({'volumes': _parse_volumes_metadata(volumes)}) else: raise ValueError("Volumes spec must be a list") katib = metadata.get('katib', False) if not isinstance(katib, bool): raise ValueError("The field `katib` is not a boolean") if katib: _validate_katib_metadata(metadata.get("katib_metadata", {})) if not re.match(K8S_VALID_NAME_REGEX, metadata['experiment_name']): raise ValueError("When choosing HP Tuning, experiment name" " {}".format(K8S_NAME_MSG)) return metadata
def run_pipeline(experiment_name: str, pipeline_id: str, run_name: str = None, version_id: str = None, host: str = None, **kwargs) -> Any: """Run pipeline (without uploading) in kfp. Args: run_name: The name of the kfp run (autogenerated if not provided) experiment_name: The name of the kfp experiment pipeline_id: The ID of the uploaded pipeline to be run version_id: the ID of the pipeline to be run host: custom host when executing outside of the cluster Returns: Pipeline run metadata """ client = _get_kfp_client(host) log.info("Creating KFP experiment '%s'...", experiment_name) experiment = client.create_experiment(experiment_name) pipeline = client.pipelines.get_pipeline(pipeline_id) pipeline_name = pipeline.name _version_id = version_id if version_id else pipeline.default_version.id version_name = client.pipelines.get_pipeline_version(_version_id).name if not run_name: run_name = ("%s-%s-%s" % (pipeline_name, version_name, utils.random_string())) display_version = ("(%sversion: '%s')" % ("" if version_id else "default ", version_name)) log.info("Submitting new pipeline run '%s' for pipeline '%s' %s ...", run_name, pipeline_name, display_version) run = client.run_pipeline(experiment.id, run_name, pipeline_id=pipeline_id, version_id=_version_id, params=kwargs) run_url = ("%s/?ns=%s#/runs/details/%s" % (client._get_url_prefix(), podutils.get_namespace(), run.id)) log.info("Successfully submitted pipeline run.") log.info("Run URL: <host>%s", run_url) return run
def upload_pipeline(pipeline_package_path: str, pipeline_name: str, host: str = None) -> Tuple[str, str]: """Upload pipeline package to KFP. If a pipeline with the provided name already exits, upload a new version. Args: pipeline_package_path: Path to .tar.gz kfp pipeline pipeline_name: Name of the uploaded pipeline host: custom host when executing outside of the cluster Returns: (pipeline_id, version_id) """ client = _get_kfp_client(host) log.info("Uploading pipeline '%s'...", pipeline_name) pipeline_id = get_pipeline_id(pipeline_name, host=host) version_name = utils.random_string() if not pipeline_id: # The first version of the pipeline is set to the pipeline name value. # To work around this, upload the first pipeline, then another one # with a proper version name. Finally delete the original pipeline. upp = client.pipeline_uploads.upload_pipeline( uploadfile=pipeline_package_path, name=pipeline_name) pipeline_id = upp.id upv = client.pipeline_uploads.upload_pipeline_version( uploadfile=pipeline_package_path, name=version_name, pipelineid=pipeline_id) # delete the first version which has the same name as the pipeline client.pipelines.delete_pipeline_version(upp.default_version.id) else: upv = client.pipeline_uploads.upload_pipeline_version( uploadfile=pipeline_package_path, name=version_name, pipelineid=pipeline_id) log.info("Successfully uploaded version '%s' for pipeline '%s'.", version_name, pipeline_name) return pipeline_id, upv.id
def generate_run_name(pipeline_name: str): """Generate a new run name based on pipeline name.""" return "{}_run-{}".format(pipeline_name, utils.random_string(5))
def _randomize_pipeline_name(self): self.pipeline_name = "%s-%s" % (self.pipeline_name, utils.random_string())
def serve(model: Any, name: str = None, wait: bool = True, predictor: str = None, preprocessing_fn: Callable = None, preprocessing_assets: Dict = None) -> KFServer: """Main API used to serve models from a notebook or a pipeline step. This function procedurally deploys a KFServing InferenceService, starting from a model object. A summary list of actions follows: * Autogenerate an InferenceService name, if not provided * Process transformer function (and related assets) * Dump the model, to a path under a mounted PVC * Snapshot the PVC * Hydrate a new PVC from the new snapshot * Submit an InferenceService CR * Monitor the CR until it becomes ready FIXME: Improve documentation. Provide some examples in the docstring and explain how the preprocessing function parsing works. Args: model: Model object to be used as a predictor name (optional): Name of the predictor. Will be autogenerated if not provided wait (optional): Wait for the InferenceService to become ready. Default: True predictor (optional): Predictor type to be used for the InferenceService. If not provided it will be inferred using the the matching marshalling backends. preprocessing_fn (optional): A processing function that will be deployed as a KFServing Transformer preprocessing_assets (optional): A dictionary with object required by the preprocessing function. This is needed in case the preprocessing function references global objects. Returns: A KFServer instance """ log.info("Starting serve procedure for model '%s'", model) if not name: name = "%s-%s" % (podutils.get_pod_name(), utils.random_string(5)) # Validate and process transformer if preprocessing_fn: _prepare_transformer_assets(preprocessing_fn, preprocessing_assets) # Detect predictor type predictor_type = marshal.get_backend(model).predictor_type if predictor and predictor != predictor_type: raise RuntimeError("Trying to create an InferenceService with" " predictor of type '%s' but the model is of type" " '%s'" % (predictor, predictor_type)) if not predictor_type: log.error( "Kale does not yet support serving objects with '%s'" " backend.\n\nPlease help us improve Kale by opening a new" " issue at:\n" "https://github.com/kubeflow-kale/kale/issues", marshal.get_backend(model).display_name) utils.graceful_exit(-1) predictor = predictor_type # in case `predictor` is None volume = podutils.get_volume_containing_path(PVC_ROOT) volume_name = volume[1].persistent_volume_claim.claim_name log.info("Model is contained in volume '%s'", volume_name) # Dump the model marshal.set_data_dir(PREDICTOR_MODEL_DIR) model_filepath = marshal.save(model, "model") log.info("Model saved successfully at '%s'", model_filepath) # Take snapshot task_info = rokutils.snapshot_pvc(volume_name, bucket=rokutils.SERVING_BUCKET, wait=True) task = rokutils.get_task(task_info["task"]["id"], bucket=rokutils.SERVING_BUCKET) new_pvc_name = "%s-pvc-%s" % (name, utils.random_string(5)) rokutils.hydrate_pvc_from_snapshot(task["result"]["event"]["object"], task["result"]["event"]["version"], new_pvc_name, bucket=rokutils.SERVING_BUCKET) # Cleanup: remove dumped model and transformer assets from the current PVC utils.rm_r( os.path.join(PREDICTOR_MODEL_DIR, os.path.basename(model_filepath))) utils.rm_r(TRANSFORMER_ASSETS_DIR, silent=True) # Need an absolute path from the *root* of the PVC. Add '/' if not exists. pvc_model_path = "/" + PREDICTOR_MODEL_DIR.lstrip(PVC_ROOT) # Tensorflow saves the model's files into a directory by itself if predictor == "tensorflow": pvc_model_path += "/" + os.path.basename(model_filepath).lstrip("/") kfserver = create_inference_service(name=name, predictor=predictor, pvc_name=new_pvc_name, model_path=pvc_model_path, transformer=preprocessing_fn is not None) if wait: monitor_inference_service(kfserver.name) return kfserver