def __init__(self, trans_id=None, nb_path=None): if not trans_id: trans_id = random_string(size=10) self.log = create_adapter(logging.getLogger(__name__), trans_id, nb_path) self.trans_id = trans_id self.nb_path = nb_path
def parse_metadata(notebook_metadata): """Parse the Notebook's metadata and update it when needed. Args: notebook_metadata (dict): metadata annotated by Kale. Refer to DEFAULT_METADATA for defaults Returns (dict): updated and validated metadata """ # check for required fields before adding all possible defaults validated_notebook_metadata = copy.deepcopy(notebook_metadata) for required in METADATA_REQUIRED_KEYS: if required not in validated_notebook_metadata: raise ValueError( "Key {} not found. Add this field either on" " the notebook metadata or as an override".format(required)) metadata = copy.deepcopy(DEFAULT_METADATA) metadata.update(validated_notebook_metadata) if not re.match(KALE_STEP_NAME_REGEX, metadata['pipeline_name']): raise ValueError("Pipeline name {}".format(KALE_NAME_MSG)) # update the pipeline name with a random string random_pipeline_name = "{}-{}".format(metadata['pipeline_name'], random_string()) metadata['pipeline_name'] = random_pipeline_name volumes = metadata.get('volumes', []) if isinstance(volumes, list): metadata.update({'volumes': _parse_volumes_metadata(volumes)}) else: raise ValueError("Volumes spec must be a list") return metadata
def __init__(self): self.store = self._connect() self.pod_name = pod_utils.get_pod_name() self.pod_namespace = pod_utils.get_namespace() self.pod = pod_utils.get_pod(self.pod_name, self.pod_namespace) self.workflow_name = self.pod.metadata.labels.get( ARGO_WORKFLOW_LABEL_KEY) self.workflow = pod_utils.get_workflow(self.workflow_name, self.pod_namespace) workflow_labels = self.workflow["metadata"].get("labels", {}) self.run_uuid = workflow_labels.get(pod_utils.KFP_RUN_ID_LABEL_KEY, self.workflow_name) workflow_annotations = self.workflow["metadata"].get("annotations", {}) pipeline_spec = json.loads( workflow_annotations.get("pipelines.kubeflow.org/pipeline_spec", {})) self.pipeline_name = pipeline_spec.get("name", self.workflow_name) self.component_id = pod_utils.compute_component_id(self.pod) self.execution_hash = self.pod.metadata.annotations.get( MLMD_EXECUTION_HASH_PROPERTY_KEY, utils.random_string(10)) self.run_context = self._get_or_create_run_context() self.execution = self._create_execution_in_run_context() self._label_with_context_and_execution()
def save_pipeline(self, pipeline_code, output_path=None): if output_path is None: # create tmp path tmp_dir = tempfile.mkdtemp() filename = "kale_pipeline_code_{}.py".format( utils.random_string(5)) output_path = os.path.join(tmp_dir, filename) with open(output_path, "w") as f: f.write(pipeline_code) self.logger.info("Pipeline code saved at {}".format(output_path)) return output_path
def generate_run_name(pipeline_name: str): """Generate a new run name based on pipeline name.""" return "{}_run-{}".format(pipeline_name, utils.random_string(5))