def update_uimetadata(artifact_name, uimetadata_path='/mlpipeline-ui-metadata.json'): """Update ui-metadata dictionary with a new web-app entry. Args: artifact_name: Name of the artifact uimetadata_path: path to mlpipeline-ui-metadata.json """ # Default empty ui-metadata dict outputs = {"outputs": []} if os.path.exists(uimetadata_path): try: outputs = json.loads(open(uimetadata_path, 'r').read()) if not outputs.get('outputs', None): outputs['outputs'] = [] except json.JSONDecodeError as e: print("Failed to parse json file {}: {}\n" "This step will not be able to visualize artifacts in the" " KFP UI".format(uimetadata_path, e)) pod_name = pod_utils.get_pod_name() namespace = pod_utils.get_namespace() workflow_name = pod_utils.get_workflow_name(pod_name, namespace) html_artifact_entry = [{ 'type': 'web-app', 'storage': 'minio', 'source': 'minio://mlpipeline/artifacts/{}/{}/{}'.format(workflow_name, pod_name, artifact_name + '.tgz') }] outputs['outputs'] += html_artifact_entry with open(uimetadata_path, "w") as f: json.dump(outputs, f)
def create_katib_experiment(request, pipeline_id, pipeline_metadata, output_path): """Create and launch a new Katib experiment. The Katib metadata must include all the information required to create an Experiment CRD (algorithm, objective, search parameters, ...). This information is sanitized a new yaml is written to file. This yaml is then submitted to the K8s API server to create the Experiment CR. Args: request: RPC request object pipeline_id: The id of the KFP pipeline that will be run by the Trials pipeline_metadata: The Kale notebook metadata output_path: The directory to store the YAML definition Returns (dict): a dictionary describing the status of the experiment """ try: namespace = pod_utils.get_namespace() except Exception: # XXX: When not running from within a pod, get_namespace() fails # XXX: If that's the case, use the 'kubeflow-user' one # XXX: This should probably change. It works for local/MiniKF dev namespace = "kubeflow-user" katib_name = pipeline_metadata.get("experiment_name") katib_spec = pipeline_metadata.get("katib_metadata", None) if not katib_spec: raise RPCNotFoundError(details=("Could not find Katib specification in" " notebook's metadata"), trans_id=request.trans_id) # Perform a sanitization of the Katib specification, making sure all the # required first-layer-fields are set katib_spec = _sanitize_katib_spec(request, katib_spec) trial_parameters = { "image": "gcr.io/arrikto-playground/elikatsis/kale/trials:1b82d32", "pipeline_id": pipeline_id, "experiment_name": pipeline_metadata.get("experiment_name") } katib_experiment = _define_katib_experiment(katib_name, katib_spec, trial_parameters) definition_path = os.path.abspath( os.path.join(output_path, "%s.katib.yaml" % katib_name)) request.log.info("Saving Katib experiment definition at %s", definition_path) with open(definition_path, "w") as yaml_file: import yaml yaml_text = yaml.dump(katib_experiment) yaml_file.write(yaml_text) _launch_katib_experiment(request, katib_experiment, namespace) return { "name": katib_experiment["metadata"]["name"], "namespace": namespace, "status": None, "trials": 0, "maxTrialCount": katib_experiment["spec"]["maxTrialCount"] }
def __init__(self): self.store = self._connect() self.pod_name = pod_utils.get_pod_name() self.pod_namespace = pod_utils.get_namespace() self.pod = pod_utils.get_pod(self.pod_name, self.pod_namespace) self.workflow_name = self.pod.metadata.labels.get( ARGO_WORKFLOW_LABEL_KEY) self.workflow = pod_utils.get_workflow(self.workflow_name, self.pod_namespace) workflow_labels = self.workflow["metadata"].get("labels", {}) self.run_uuid = workflow_labels.get(pod_utils.KFP_RUN_ID_LABEL_KEY, self.workflow_name) workflow_annotations = self.workflow["metadata"].get("annotations", {}) pipeline_spec = json.loads( workflow_annotations.get("pipelines.kubeflow.org/pipeline_spec", {})) self.pipeline_name = pipeline_spec.get("name", self.workflow_name) self.component_id = pod_utils.compute_component_id(self.pod) self.execution_hash = self.pod.metadata.annotations.get( MLMD_EXECUTION_HASH_PROPERTY_KEY, utils.random_string(10)) self.run_context = self._get_or_create_run_context() self.execution = self._create_execution_in_run_context() self._label_with_context_and_execution()
def create_cloned_volumes(self, volumes): """Use Rok to take snapshots of volumes. Args: volumes (list): a list of volumes Returns: Same list of volumes replacing the 'cloned' ones with 'new_pvc' ones setting the corresponding 'rok/origin' annotation """ if not any(v['type'] == 'clone' for v in volumes): return volumes # FIXME: Make sure the bucket exists bucket_name = "notebooks" hostname = os.getenv("HOSTNAME") # FIXME: Import the Rok client instead of spawning external commands namespace = get_namespace() commit_title = "Snapshot of notebook {}".format(hostname) commit_message = NOTEBOOK_SNAPSHOT_COMMIT_MESSAGE.format( hostname, namespace) output_cmd = ( "rok-gw -o json object-register jupyter" + " '{}' '{}' --no-interactive".format(bucket_name, hostname) + " --param namespace='{}'".format(namespace) + " --param commit_title='{}'".format(commit_title) + " --param commit_message='{}'".format(commit_message)) output = self.run_cmd(output_cmd) output = json.loads(output) snapshot_volumes = output['result']['version']['group_members'] # Retrieve the mount point of each snapshotted volume for v in snapshot_volumes: obj_name = v["object_name"] version_name = v["version_name"] output_cmd = ( "rok-gw -o json object-show '{}'".format(bucket_name) + " '{}' --version '{}'".format(obj_name, version_name) + " --detail") output = self.run_cmd(output_cmd) v["mount_point"] = json.loads(output)["metadata"]["mountpoint"] _volumes = [] for volume in volumes or []: if volume['type'] == 'clone': volume = self._get_cloned_volume(volume, snapshot_volumes) _volumes.append(volume) return _volumes
def snapshot_notebook(bucket=DEFAULT_BUCKET, obj=None): rok = _get_client() hostname = os.getenv("HOSTNAME") namespace = pod_utils.get_namespace() commit_title = f"Snapshot of notebook {hostname}" commit_message = NOTEBOOK_SNAPSHOT_COMMIT_MESSAGE.format(hostname, namespace) params = {"namespace": namespace, "commit_title": commit_title, "commit_message": commit_message} obj = obj or pod_utils.get_pod_name() # Create the bucket in case it does not exist pod_utils.create_rok_bucket(bucket, client=rok) return rok.version_register(bucket, obj, "jupyter", params)
def check_rok_availability(request): """Check if Rok is available.""" log = request.log if hasattr(request, "log") else logger try: rok = _get_client() except ImportError: log.exception("Failed to import RokClient") raise RPCNotFoundError(details="Rok Gateway Client module not found", trans_id=request.trans_id) except Exception: log.exception("Failed to initialize RokClient") raise RPCServiceUnavailableError(details=("Failed to initialize" " RokClient"), trans_id=request.trans_id) try: rok.account_info() except Exception: log.exception("Failed to retrieve account information") raise RPCServiceUnavailableError(details="Failed to access Rok", trans_id=request.trans_id) name = pod_utils.get_pod_name() namespace = pod_utils.get_namespace() try: suggestions = rok.version_register_suggest(DEFAULT_BUCKET, name, "jupyter", "params:lab", {"namespace": namespace}, ignore_env=True) except Exception as e: log.exception("Failed to list lab suggestions") message = "%s: %s" % (e.__class__.__name__, e) raise RPCServiceUnavailableError(message=message, details=("Rok cannot list notebooks" " in this namespace"), trans_id=request.trans_id) if not any(s["value"] == name for s in suggestions): log.error("Could not find notebook '%s' in list of suggestions", name) raise RPCNotFoundError(details=("Could not find this notebook in" " notebooks listed by Rok"), trans_id=request.trans_id)
def create_cloned_volumes(self, volumes): if not any(v['type'] == 'clone' for v in volumes): return volumes # FIXME: Make sure the bucket exists bucket_name = "notebooks" hostname = os.getenv("HOSTNAME") # FIXME: Import the Rok client instead of spawning external commands namespace = get_namespace() commit_title = f"Snapshot of notebook {hostname}" commit_message = NOTEBOOK_SNAPSHOT_COMMIT_MESSAGE.format( hostname, namespace) output = self.run_cmd(f"rok-gw -o json object-register jupyter" f" '{bucket_name}' '{hostname}' --no-interactive" f" --param namespace='{namespace}'" f" --param commit_title='{commit_title}'" f" --param commit_message='{commit_message}'") output = json.loads(output) snapshot_volumes = output['result']['version']['group_members'] # Retrieve the mount point of each snapshotted volume for v in snapshot_volumes: obj_name = v["object_name"] version_name = v["version_name"] output = self.run_cmd(f"rok-gw -o json object-show '{bucket_name}'" f" '{obj_name}' --version '{version_name}'" " --detail") v["mount_point"] = json.loads(output)["metadata"]["mountpoint"] _volumes = [] for volume in volumes or []: if volume['type'] == 'clone': volume = self._get_cloned_volume(volume, snapshot_volumes) _volumes.append(volume) return _volumes