def create_and_wait_kfp_run(pipeline_id: str, run_name: str, version_id: str = None, experiment_name: str = "Default", namespace: str = "kubeflow", **kwargs): """Create a KFP run, wait for it to complete and retrieve its metrics. Create a KFP run from a KFP pipeline with custom arguments and wait for it to finish. If it succeeds, return its metrics. Args: pipeline_id: KFP pipeline version_id: KFP pipeline's version (optional, not supported yet) experiment_name: KFP experiment to create run in. (default: "Default") namespace: Namespace of KFP deployment kwargs: All the parameters the pipeline will be fed with Returns: metrics: Dict of metrics along with their values """ logger = _get_logger() pod_namespace = podutils.get_namespace() run_id = _create_kfp_run(pipeline_id, run_name, version_id, experiment_name, namespace, **kwargs) logger.info("Annotating Trial '%s' with the KFP Run UUID '%s'...", run_name, run_id) try: # Katib Trial name == KFP Run name by design (see rpc.katib) katibutils.annotate_trial(run_name, pod_namespace, {KALE_KATIB_KFP_ANNOTATION: run_id}) except Exception: logger.exception( "Failed to annotate Trial '%s' with the KFP Run UUID" " '%s'", run_name, run_id) logger.info("Getting Workflow name for run '%s'...", run_id) workflow_name = _get_workflow_from_run(get_run(run_id))["metadata"]["name"] logger.info("Workflow name: %s", workflow_name) logger.info("Getting the Katib trial...") trial = katibutils.get_trial(run_name, pod_namespace) logger.info("Trial name: %s, UID: %s", trial["metadata"]["name"], trial["metadata"]["uid"]) logger.info("Getting owner Katib experiment of trial...") exp_name, exp_id = katibutils.get_owner_experiment_from_trial(trial) logger.info("Experiment name: %s, UID: %s", exp_name, exp_id) wf_annotations = { katibutils.EXPERIMENT_NAME_ANNOTATION_KEY: exp_name, katibutils.EXPERIMENT_ID_ANNOTATION_KEY: exp_id, katibutils.TRIAL_NAME_ANNOTATION_KEY: trial["metadata"]["name"], katibutils.TRIAL_ID_ANNOTATION_KEY: trial["metadata"]["uid"], } try: workflowutils.annotate_workflow(workflow_name, pod_namespace, wf_annotations) except Exception: logger.exception( "Failed to annotate Workflow '%s' with the Katib" " details", workflow_name) status = _wait_kfp_run(run_id) # If run has not succeeded, return no metrics if status != "Succeeded": logger.warning("KFP run did not run successfully. No metrics to" " return.") # exit gracefully with error sys.exit(-1) # Retrieve metrics run_metrics = _get_kfp_run_metrics(run_id, namespace) for name, value in run_metrics.items(): logger.info("%s=%s", name, value) return run_metrics
def create_and_wait_kfp_run(pipeline_id: str, version_id: str, run_name: str, experiment_name: str = "Default", api_version: str = KATIB_API_VERSION_V1BETA1, **kwargs): """Create a KFP run, wait for it to complete and retrieve its metrics. Create a KFP run from a KFP pipeline with custom arguments and wait for it to finish. If it succeeds, return its metrics, logging them in a format that can be parsed by Katib's metrics collector. Also, annotate the parent trial with the run UUID of the KFP run and annotation the KFP workflow with the Katib experiment and trial names and ids. Args: pipeline_id: KFP pipeline version_id: KFP pipeline's version run_name: The name of the new run experiment_name: KFP experiment to create run in. (default: "Default") api_version: The version of the Katib CRD (`v1alpha3` or `v1beta1` kwargs: All the parameters the pipeline will be fed with Returns: metrics: Dict of metrics along with their values """ pod_namespace = podutils.get_namespace() run = kfputils.run_pipeline(experiment_name=experiment_name, pipeline_id=pipeline_id, version_id=version_id, run_name=run_name, **kwargs) run_id = run.id log.info("Annotating Trial '%s' with the KFP Run UUID '%s'...", run_name, run_id) try: # Katib Trial name == KFP Run name by design (see rpc.katib) annotate_trial(run_name, pod_namespace, {KALE_KATIB_KFP_ANNOTATION_KEY: run_id}, api_version) except Exception: log.exception("Failed to annotate Trial '%s' with the KFP Run UUID" " '%s'", run_name, run_id) log.info("Getting Workflow name for run '%s'...", run_id) workflow_name = kfputils.get_workflow_from_run( kfputils.get_run(run_id))["metadata"]["name"] log.info("Workflow name: %s", workflow_name) log.info("Getting the Katib trial...") trial = get_trial(run_name, pod_namespace, api_version) log.info("Trial name: %s, UID: %s", trial["metadata"]["name"], trial["metadata"]["uid"]) log.info("Getting owner Katib experiment of trial...") exp_name, exp_id = get_owner_experiment_from_trial(trial) log.info("Experiment name: %s, UID: %s", exp_name, exp_id) wf_annotations = { EXPERIMENT_NAME_ANNOTATION_KEY: exp_name, EXPERIMENT_ID_ANNOTATION_KEY: exp_id, TRIAL_NAME_ANNOTATION_KEY: trial["metadata"]["name"], TRIAL_ID_ANNOTATION_KEY: trial["metadata"]["uid"], } try: workflowutils.annotate_workflow(workflow_name, pod_namespace, wf_annotations) except Exception: log.exception("Failed to annotate Workflow '%s' with the Katib" " details", workflow_name) status = kfputils.wait_kfp_run(run_id) # If run has not succeeded, return no metrics if status != "Succeeded": log.warning("KFP run did not run successfully. No metrics to" " return.") # exit gracefully with error sys.exit(-1) # Retrieve metrics run_metrics = kfputils.get_kfp_run_metrics(run_id) for name, value in run_metrics.items(): log.info("%s=%s", name, value) return run_metrics