def scheduler(status: kopf.Status, patch: kopf.Patch, logger: kopf.Logger, **_: Any) -> str: replication = status.get("replication", {}) replication["codeBuildStatus"] = None replication["codeBuildPhase"] = None replication["codeBuildId"] = None attempt = replication.get("attempt", 0) + 1 if attempt > CONFIG["max_replication_attempts"]: replication["replicationStatus"] = "MaxAttemptsExceeded" replication["attempt"] = attempt patch["status"] = {"replication": replication} else: with LOCK: global WORKERS_IN_PROCESS logger.debug("WORKERS_IN_PROCESS: %s", WORKERS_IN_PROCESS) if WORKERS_IN_PROCESS < CONFIG["workers"]: WORKERS_IN_PROCESS += 1 replication["replicationStatus"] = "Scheduled" replication["attempt"] = attempt patch["status"] = {"replication": replication} logger.info("Schedule Attempt: %s", replication["attempt"]) return cast(str, replication["replicationStatus"])
def delete_poddefaults_from_user_namespaces( poddefaults: List[Dict[str, Any]], user_namespaces: List[str], client: DynamicClient, logger: kopf.Logger, ) -> None: logger.debug( "Deleting PodDefaults %s from user Namespaces %s", [pd["metadata"]["name"] for pd in poddefaults], user_namespaces, ) for poddefault in poddefaults: for namespace in user_namespaces: try: delete_poddefault( namespace=namespace, name=poddefault["metadata"]["name"], client=client, logger=logger, ) except Exception as e: logger.warn( "Unable to delete PodDefault %s from Namespace %s: %s", poddefault["metadata"]["name"], namespace, str(e), )
def delete_userspace(namespace: str, name: str, client: dynamic.DynamicClient, logger: kopf.Logger) -> None: api = client.resources.get(api_version=ORBIT_API_VERSION, group=ORBIT_API_GROUP, kind="UserSpace") api.delete(namespace=namespace, name=name, body={}) logger.debug("Deleted UserSpace: %s in Namesapce: %s", name, namespace)
def modify_poddefault( namespace: str, name: str, desc: str, client: dynamic.DynamicClient, logger: kopf.Logger, ) -> None: api = client.resources.get(api_version=KUBEFLOW_API_VERSION, group=KUBEFLOW_API_GROUP, kind="PodDefault") patch = {"spec": {"desc": desc}} api.patch(namespace=namespace, name=name, body=patch) logger.debug("Modified PodDefault: %s in Namespace: %s", name, namespace)
def modify_userspace( namespace: str, name: str, desc: str, client: dynamic.DynamicClient, logger: kopf.Logger, ) -> None: api = client.resources.get(api_version=ORBIT_API_VERSION, group=ORBIT_API_GROUP, kind="UserSpace") patch = {"spec": {"desc": desc}} api.patch(namespace=namespace, name=name, body=patch) logger.debug("Modified UserSpace: %s in Namespace: %s", name, namespace)
def create_poddefault( namespace: str, poddefault: Dict[str, Any], client: dynamic.DynamicClient, logger: kopf.Logger, ) -> None: api = client.resources.get(api_version=KUBEFLOW_API_VERSION, group=KUBEFLOW_API_GROUP, kind="PodDefault") api.create(namespace=namespace, body=poddefault) logger.debug( "Created PodDefault: %s in Namespace: %s", poddefault["metadata"]["name"], namespace, )
def _uninstall_chart(helm_release: str, namespace: str, logger: kopf.Logger) -> bool: install_status = True cmd = f"/usr/local/bin/helm uninstall --debug --namespace {namespace} {helm_release}" try: logger.debug("running uninstall cmd: %s", cmd) output = run_command(cmd) logger.debug(output) logger.info("finished uninstall cmd: %s", cmd) except Exception: logger.error("errored cmd: %s", cmd) install_status = False return install_status
def _get_team_context(team: str, logger: kopf.Logger) -> Dict[str, Any]: try: api_instance = CoreV1Api() team_context_cf: V1ConfigMap = api_instance.read_namespaced_config_map( "orbit-team-context", team) team_context_str = team_context_cf.data["team"] logger.debug("team context: %s", team_context_str) team_context: Dict[str, Any] = json.loads(team_context_str) logger.debug("team context keys: %s", team_context.keys()) except Exception as e: logger.error("Error during fetching team context configmap") raise e return team_context
def rescheduler(status: kopf.Status, patch: kopf.Patch, logger: kopf.Logger, **_: Any) -> str: logger.debug("Rescheduling") replication = status.get("replication", {}) failure_delay = replication.get("failureDelay", 0) if failure_delay > 0: replication["failureDelay"] = failure_delay - 5 else: replication["replicationStatus"] = "Pending" replication["failureDelay"] = None patch["status"] = {"replication": replication} return "Rescheduled"
def orbit_job_monitor( namespace: str, name: str, patch: kopf.Patch, logger: kopf.Logger, namespaces_idx: kopf.Index[str, Dict[str, Any]], jobs_idx: kopf.Index[Tuple[str, str], Dict[str, Any]], **_: Any, ) -> Any: ns: Optional[Dict[str, Any]] = None k8s_job: Optional[Dict[str, Any]] = None for ns in namespaces_idx.get(namespace, []): logger.debug("ns: %s", ns) if ns is None: patch["status"] = { "orbitJobOperator": { "jobStatus": "JobDetailsNotFound", "error": "No Namespace resource found" } } return "JobDetailsNotFound" for k8s_job in jobs_idx.get((namespace, name), []): logger.debug("k8s_job: %s", k8s_job) if k8s_job is None: # To tackle the race condition caused by Timer return "JobMetadataNotFound" if k8s_job.get("status", {}).get("active") == 1: job_status = "Active" else: job_status = k8s_job.get("status", {}).get("conditions", [{}])[0].get("type") k8s_job_reason = k8s_job.get("status", {}).get("conditions", [{}])[0].get("status") k8s_job_message = k8s_job.get("status", {}).get("conditions", [{}])[0].get("message") patch["status"] = { "orbitJobOperator": { "jobStatus": job_status, "jobName": k8s_job.get("name"), "k8sJobReason": k8s_job_reason, "k8sJobMessage": k8s_job_message, } } return job_status
def create_userspace( namespace: str, userspace: Dict[str, Any], client: dynamic.DynamicClient, logger: kopf.Logger, ) -> None: api = client.resources.get(api_version=ORBIT_API_VERSION, group=ORBIT_API_GROUP, kind="UserSpace") api.create(namespace=namespace, body=userspace) logger.debug( "Created UserSpace: %s in Namespace: %s", userspace["metadata"]["name"], namespace, )
def copy_poddefaults_to_user_namespaces( poddefaults: List[Dict[str, Any]], user_namespaces: List[str], client: DynamicClient, logger: kopf.Logger, ) -> None: logger.debug( "Copying PodDefaults %s to user Namespaces %s", [pd["metadata"]["name"] for pd in poddefaults], user_namespaces, ) for poddefault in poddefaults: for namespace in user_namespaces: try: kwargs = { "name": poddefault["metadata"]["name"], "desc": poddefault["spec"]["desc"], "labels": { "orbit/space": "user", "orbit/team": poddefault["metadata"]["labels"].get("orbit/team", None), }, } create_poddefault( namespace=namespace, poddefault=construct(**kwargs), client=client, logger=logger, ) except ApiException as e: logger.warning( "Unable to create PodDefault %s in Namespace %s: %s", poddefault["metadata"]["name"], namespace, str(e.body), ) except Exception as e: logger.warning( "Failed to create PodDefault", str(e), )
def update_pod_images( spec: kopf.Spec, patch: kopf.Patch, dryrun: bool, logger: kopf.Logger, imagereplications_idx: kopf.Index[str, str], **_: Any, ) -> kopf.Patch: if dryrun: logger.debug("DryRun - Skip Pod Mutation") return patch annotations = {} init_containers: List[Dict[str, Any]] = [] containers: List[Dict[str, Any]] = [] replications = {} def process_containers(src_containers: Optional[List[Dict[str, Any]]], dest_containers: List[Dict[str, Any]]) -> None: for container in src_containers if src_containers else []: image = container.get("image", "") desired_image = imagereplication_utils.get_desired_image( image=image, config=CONFIG) if image != desired_image: container_copy = deepcopy(container) container_copy["image"] = desired_image dest_containers.append(container_copy) replications[image] = desired_image annotations[ f"original-container-image~1{container['name']}"] = image process_containers(spec.get("initContainers", []), init_containers) process_containers(spec.get("containers", []), containers) if replications: client = dynamic_client() for source, destination in replications.items(): if not imagereplications_idx.get(destination, []): imagereplication_utils.create_imagereplication( namespace="orbit-system", source=source, destination=destination, client=client, logger=logger, ) else: logger.debug("Skipping ImageReplication Creation") if annotations: patch["metadata"] = {"annotations": annotations} patch["spec"] = {} if init_containers: patch["spec"]["initContainers"] = init_containers if containers: patch["spec"]["containers"] = containers logger.debug("Patch: %s", str(patch)) return patch
def _install_helm_chart( helm_release: str, namespace: str, team: str, user: str, user_email: str, user_efsapid: str, repo: str, package: str, logger: kopf.Logger, ) -> bool: install_status = True # try to uninstall first try: cmd = f"helm uninstall --debug {helm_release} -n {team}" logger.debug("running cmd: %s", cmd) output = run_command(cmd) logger.debug(output) logger.info("finished cmd: %s", cmd) except Exception: logger.debug("helm uninstall did not find the release") cmd = ( f"/usr/local/bin/helm upgrade --install --devel --debug --namespace {team} " f"{helm_release} {repo}/{package} " f"--set user={user},user_email={user_email},namespace={namespace},user_efsapid={user_efsapid}" ) try: logger.debug("running cmd: %s", cmd) output = run_command(cmd) logger.debug(output) logger.info("finished cmd: %s", cmd) except Exception: logger.warning("errored cmd: %s", cmd) install_status = False return install_status
def filter_podsettings( podsettings: List[Dict[str, Any]], pod_labels: kopf.Labels, logger: kopf.Logger, ) -> List[Dict[str, Any]]: filtered_podsettings: List[Dict[str, Any]] = [] def labels_match(labels: kopf.Labels, selector_labels: kopf.Labels) -> bool: for key, value in selector_labels.items(): label_value = labels.get(key, None) if label_value != value: logger.debug( "NoHit: Label value check, label %s with value %s does not equal %s", key, label_value, value, ) return False return True def expressions_match(labels: kopf.Labels, selector_expressions: List[Dict[str, Any]]) -> bool: for match_expression in selector_expressions: pod_label_value = labels.get(match_expression["key"], None) operator = match_expression["operator"] values = match_expression.get("values", []) if operator == "Exists" and pod_label_value is None: logger.debug( "NoHit: Exists check, label %s does not exist", match_expression["key"], ) return False if operator == "NotExists" and pod_label_value is not None: logger.debug( "NoHit: NotExists check, label %s does exist with value %s", match_expression["key"], pod_label_value, ) return False if operator == "In" and pod_label_value not in values: logger.debug( "NoHit: In check, label %s has value %s which is not in %s", match_expression["key"], pod_label_value, values, ) return False if operator == "NotIn" and pod_label_value in values: logger.debug( "NoHit: NotIn check, label %s has value %s which is in %s", match_expression["key"], pod_label_value, values, ) return False return True for podsetting in podsettings: selector_labels = podsetting["spec"]["podSelector"].get( "matchLabels", {}) selector_expressions = podsetting["spec"]["podSelector"].get( "matchExpressions", []) if pod_labels == {}: logger.debug("NoHit: Pod contains no labels to match against") continue elif selector_labels == {} and selector_expressions == []: logger.debug( "NoHit: PodSetting contains no podSelectors to match against. PodSetting: %s", podsetting["name"], ) continue elif not labels_match(pod_labels, selector_labels): logger.debug( "NoHit: Pod labels and PodSetting matchLabels do not match. PodSetting: %s", podsetting["name"], ) continue elif not expressions_match(pod_labels, selector_expressions): logger.debug( "NoHit: Pod labels and PodSetting matchExpressions do not match. PodSetting: %s", podsetting["name"], ) continue else: logger.debug( "Hit: Pod labels and PodSetting podSelectors match. PodSetting: %s", podsetting["name"], ) filtered_podsettings.append(podsetting) return filtered_podsettings
def delete_poddefault(namespace: str, name: str, client: dynamic.DynamicClient, logger: kopf.Logger) -> None: api = client.resources.get(api_version=KUBEFLOW_API_VERSION, group=KUBEFLOW_API_GROUP, kind="PodDefault") api.delete(namespace=namespace, name=name, body={}) logger.debug("Deleted PodDefault: %s in Namesapce: %s", name, namespace)
def install_team( name: str, meta: kopf.Meta, spec: kopf.Spec, status: kopf.Status, patch: kopf.Patch, podsettings_idx: kopf.Index[str, Dict[str, Any]], logger: kopf.Logger, **_: Any, ) -> str: logger.debug("loading kubeconfig") load_config() logger.info("processing userspace cr") logger.debug("namespace: %s", name) env = spec.get("env", None) space = spec.get("space", None) team = spec.get("team", None) user = spec.get("user", None) team_efsid = spec.get("teamEfsId", None) user_email = spec.get("userEmail", None) logger.debug("new namespace: %s,%s,%s,%s", team, user, user_email, name) if not env or not space or not team or not user or not team_efsid or not user_email: logger.error( ("All of env, space, team, user, team_efsid, and user_email are required." "Found: %s, %s, %s, %s, %s, %s"), env, space, team, user, team_efsid, user_email, ) patch["metadata"] = { "annotations": { "orbit/helm-chart-installation": "Skipped" } } return "Skipping" client = dynamic_client() try: logger.info(f"Creating EFS endpoint for {team}-{user}...") efs_ep_resp = _create_user_efs_endpoint(user=user, team_name=team, team_efsid=team_efsid, env=env) access_point_id = efs_ep_resp.get("AccessPointId", "") logger.info(f"AccessPointId is {access_point_id}") except Exception as e: logger.error( f"Error while creating EFS access point for user_name={user} and team={team}: {e}" ) patch["status"] = { "userSpaceOperator": { "installationStatus": "Failed to create EFS AccessPoint", "exception": str(e) } } return "Failed" team_context = _get_team_context(team=team, logger=logger) logger.info("team context keys: %s", team_context.keys()) helm_repo_url = team_context["UserHelmRepository"] logger.debug("Adding Helm Repository: %s at %s", team, helm_repo_url) repo = f"{team}--userspace" # add the team repo unique_hash = "".join( random.choice(string.ascii_lowercase) for i in range(6)) run_command(f"helm repo add {repo} {helm_repo_url}") try: # In isolated envs, we cannot refresh stable, and since we don't use it, we remove it run_command("helm repo remove stable") except Exception: logger.info( "Tried to remove stable repo...got an error, but moving on") run_command("helm repo update") run_command( f"helm search repo --devel {repo} -o json > /tmp/{unique_hash}-charts.json" ) with open(f"/tmp/{unique_hash}-charts.json", "r") as f: charts = json.load(f) run_command( f"helm list -n {team} -o json > /tmp/{unique_hash}-releases.json") with open(f"/tmp/{unique_hash}-releases.json", "r") as f: releaseList = json.load(f) releases = [r["name"] for r in releaseList] logger.info("current installed releases: %s", releases) for chart in charts: chart_name = chart["name"].split("/")[1] helm_release = f"{name}-{chart_name}" # do not install again the chart if its already installed as some charts are not upgradable. # namespaces might if helm_release not in releaseList: # install the helm package for this user space logger.info( f"install the helm package chart_name={chart_name} helm_release={helm_release}" ) install_status = _install_helm_chart( helm_release=helm_release, namespace=name, team=team, user=user, user_email=user_email, user_efsapid=access_point_id, repo=repo, package=chart_name, logger=logger, ) if install_status: logger.info("Helm release %s installed at %s", helm_release, name) continue else: patch["status"] = { "userSpaceOperator": { "installationStatus": "Failed to install", "chart_name": chart_name } } return "Failed" logger.info("Copying PodDefaults from Team") logger.info("podsettings_idx:%s", podsettings_idx) # Construct pseudo poddefaults for each podsetting in the team namespace poddefaults = [ poddefault_utils.construct( name=ps["name"], desc=ps["spec"].get("desc", ""), labels={ "orbit/space": "team", "orbit/team": team }, ) for ps in podsettings_idx.get(team, []) ] poddefault_utils.copy_poddefaults_to_user_namespaces( client=client, poddefaults=poddefaults, user_namespaces=[name], logger=logger) patch["metadata"] = { "annotations": { "orbit/helm-chart-installation": "Complete" } } patch["metadata"] = {"labels": {"userEfsApId": access_point_id}} patch["status"] = { "userSpaceOperator": { "installationStatus": "Installed" } } return "Installed"
def uninstall_team_charts( name: str, annotations: kopf.Annotations, labels: kopf.Labels, spec: kopf.Spec, patch: kopf.Patch, logger: kopf.Logger, meta: kopf.Meta, **_: Any, ) -> str: logger.debug("loading kubeconfig") load_config() logger.info("processing removed namespace %s", name) space = spec.get("space", None) if space == "team": logger.info("delete all namespaces that belong to the team %s", name) run_command(f"kubectl delete profile -l orbit/team={name}") time.sleep(60) run_command( f"kubectl delete namespace -l orbit/team={name},orbit/space=user") logger.info("all namespaces that belong to the team %s are deleted", name) elif space == "user": env = spec.get("env", None) team = spec.get("team", None) user = spec.get("user", None) user_email = spec.get("userEmail", None) logger.debug("removed namespace: %s,%s,%s,%s", team, user, user_email, name) if not env or not space or not team or not user or not user_email: logger.error( "All of env, space, team, user, and user_email are required. Found: %s, %s, %s, %s, %s", env, space, team, user, user_email, ) return "Skipping" _delete_user_efs_endpoint(user_name=user, user_namespace=f"{team}-{user}", logger=logger, meta=meta) team_context = _get_team_context(team=team, logger=logger) logger.info("team context keys: %s", team_context.keys()) helm_repo_url = team_context["UserHelmRepository"] repo = f"{team}--userspace" # add the team repo unique_hash = "".join( random.choice(string.ascii_lowercase) for i in range(6)) run_command(f"helm repo add {repo} {helm_repo_url}") run_command( f"helm search repo --devel {repo} -o json > /tmp/{unique_hash}-charts.json" ) with open(f"/tmp/{unique_hash}-charts.json", "r") as f: charts = json.load(f) run_command( f"helm list -n {team} -o json > /tmp/{unique_hash}-releases.json") with open(f"/tmp/{unique_hash}-releases.json", "r") as f: releaseList = json.load(f) releases = [r["name"] for r in releaseList] logger.info("current installed releases: %s", releases) for chart in charts: chart_name = chart["name"].split("/")[1] helm_release = f"{name}-{chart_name}" if helm_release in releases: install_status = _uninstall_chart(helm_release=helm_release, namespace=team, logger=logger) if install_status: logger.info("Helm release %s installed at %s", helm_release, name) continue else: patch["status"] = { "userSpaceOperator": { "installationStatus": "Failed to uninstall", "chart_name": chart_name } } return "Failed" patch["status"] = { "userSpaceOperator": { "installationStatus": "Uninstalled" } } return "Uninstalled"
def update_pod_images( namespace: str, labels: kopf.Labels, body: kopf.Body, patch: kopf.Patch, dryrun: bool, logger: kopf.Logger, warnings: List[str], namespaces_idx: kopf.Index[str, Dict[str, Any]], podsettings_idx: kopf.Index[str, Dict[str, Any]], **_: Any, ) -> kopf.Patch: if dryrun: logger.debug("DryRun - Skip Pod Mutation") return patch # This is a hack to get the only namespace from the index Store ns: Dict[str, Any] = {} for ns in cast(List[Dict[str, Any]], namespaces_idx.get(namespace, [{}])): logger.debug("Namespace: %s", ns) team = ns.get("labels", {}).get("orbit/team", None) if not team: logger.info("No 'orbit/team' label found on Pod's Namespace: %s", namespace) # warnings.append(f"No 'orbit/team' label found on Pod's Namespace: {namespace}") return patch team_podsettings: List[Dict[str, Any]] = cast(List[Dict[str, Any]], podsettings_idx.get(team, [])) if not team_podsettings: logger.info("No PodSettings found for Pod's Team: %s", team) # warnings.append(f"No PodSettings found for Pod's Team: {team}") return patch fitlered_podsettings = podsetting_utils.filter_podsettings( podsettings=team_podsettings, pod_labels=labels, logger=logger ) if not fitlered_podsettings: logger.info("No PodSetting Selectors matched the Pod") return patch applied_podsetting_names = [] body_dict = { "metadata": {k: v for k, v in body["metadata"].items()}, "spec": {k: v for k, v in body["spec"].items()}, } logger.debug("BodyDict: %s", body_dict) mutable_body = deepcopy(body) for podsetting in fitlered_podsettings: try: podsetting_utils.apply_settings_to_pod(namespace=ns, podsetting=podsetting, pod=mutable_body, logger=logger) applied_podsetting_names.append(podsetting["name"]) except Exception as e: logger.exception("Error applying PodSetting %s: %s", podsetting["name"], str(e)) warnings.append(f"Error applying PodSetting {podsetting['name']}: {str(e)}") if body_dict["spec"] == mutable_body["spec"] and body_dict["metadata"] == mutable_body["metadata"]: logger.warn("PodSetting Selectors matched the Pod but no changes were applied") warnings.append("PodSetting Selectors matched the Pod but no changes were applied") return patch patch["metadata"] = {} patch["metadata"]["annotations"] = { **mutable_body["metadata"].get("annotations", {}), **{"orbit/applied-podsettings": ",".join(applied_podsetting_names)}, } patch["metadata"]["annotations"] = {k.replace("/", "~1"): v for k, v in patch["metadata"]["annotations"].items()} if "labels" in mutable_body["metadata"]: patch["metadata"]["labels"] = {k.replace("/", "~1"): v for k, v in mutable_body["metadata"]["labels"].items()} patch["spec"] = mutable_body["spec"] logger.info("Applying Patch %s", patch) return patch
def orbit_cron_job_monitor( namespace: str, name: str, patch: kopf.Patch, status: kopf.Status, logger: kopf.Logger, namespaces_idx: kopf.Index[str, Dict[str, Any]], cron_jobs_idx: kopf.Index[Tuple[str, str], Dict[str, Any]], **_: Any, ) -> Any: ns: Optional[Dict[str, Any]] = None k8s_job: Optional[Dict[str, Any]] = None for ns in namespaces_idx.get(namespace, []): logger.debug("ns: %s", ns) if ns is None: patch["status"] = { "orbitJobOperator": { "jobStatus": "JobDetailsNotFound", "error": "No Namespace resource found" } } return "JobDetailsNotFound" logger.debug("cron_jobs_idx: %s", cron_jobs_idx) for k8s_job in cron_jobs_idx.get((namespace, name), []): logger.debug("k8s_job: %s", k8s_job) if k8s_job is None: # To tackle the race condition caused by Timer return "JobMetadataNotFound" if not k8s_job.get("status", {}): cron_job_status = "Activating" else: cron_job_status = "Active" if k8s_job.get("status"): for i in k8s_job.get("status", {}).get("active", [{}]): if i.get("name") not in status.get("orbitJobOperator", {}).get("cronJobIds", []): cron_job_ids: List[str] = status.get("orbitJobOperator", {}).get("cronJobIds", []) cron_job_ids.append(i.get("name")) patch["status"] = { "orbitJobOperator": { "jobStatus": cron_job_status, "jobName": k8s_job.get("name"), "cronJobIds": cron_job_ids, } } else: return cron_job_status else: patch["status"] = { "orbitJobOperator": { "jobStatus": cron_job_status, "jobName": k8s_job.get("name"), "cronJobIds": status.get("orbitJobOperator", {}).get("cronJobIds", []), } } return cron_job_status
def create_job( namespace: str, name: str, labels: kopf.Labels, annotations: kopf.Annotations, spec: kopf.Spec, status: kopf.Status, patch: kopf.Patch, logger: kopf.Logger, namespaces_idx: kopf.Index[str, Dict[str, Any]], podsettings_idx: kopf.Index[Tuple[str, str], Dict[str, Any]], **_: Any, ) -> str: ns: Optional[Dict[str, Any]] = None for ns in namespaces_idx.get(namespace, []): logger.debug("ns: %s", ns) if ns is None: patch["status"] = { "orbitJobOperator": { "jobStatus": "JobCreationFailed", "error": "No Namespace resource found" } } return "JobCreationFailed" env = ns["env"] team = ns["team"] global ENV_CONTEXT # Caching if ENV_CONTEXT is None: context = _load_env_context_from_ssm(env) if context is None: patch["status"] = { "orbitJobOperator": { "jobStatus": "JobCreationFailed", "error": "Unable to load Env Context from SSM" } } return "JobCreationFailed" else: ENV_CONTEXT = context node_type = spec.get("compute", {}).get("nodeType", "fargate") labels = { "app": "orbit-runner", "orbit/node-type": node_type, "notebook-name": spec.get("notebookName", ""), "orbit/attach-security-group": "yes" if node_type == "ec2" else "no", } podsetting_metadata: Dict[str, Any] = {} for podsetting_metadata in podsettings_idx.get( (team, spec.get("compute", {}).get("podSetting", None)), []): logger.debug("PodSetting: %s", podsetting_metadata) job_spec = job_utils.construct_job_spec( env=env, team=team, env_context=ENV_CONTEXT, podsetting_metadata=podsetting_metadata, orbit_job_spec=spec, labels=labels, ) logger.debug("spec: %s", spec) if spec.get("schedule"): cronjob_id = f"orbit-{namespace}-{spec.get('triggerName')}" cron_job_template: V1beta1JobTemplateSpec = V1beta1JobTemplateSpec( spec=job_spec) cron_job_spec: V1beta1CronJobSpec = V1beta1CronJobSpec( job_template=cron_job_template, schedule=spec.get("schedule")) job = V1beta1CronJob( api_version="batch/v1beta1", kind="CronJob", metadata=V1ObjectMeta(name=cronjob_id, labels={ **labels, **spec.get("compute", {}).get( "labels", {}) }, namespace=namespace), status=V1beta1CronJobStatus(), spec=cron_job_spec, ) kopf.adopt(job, nested="spec.template") cron_job_instance: V1beta1CronJob = BatchV1beta1Api( ).create_namespaced_cron_job(namespace=namespace, body=job) cronjob_instance_metadata: V1ObjectMeta = cron_job_instance.metadata logger.debug("Started Cron Job: %s", cronjob_instance_metadata.name) patch["metadata"] = {"labels": {"k8sJobType": "CronJob"}} patch["status"] = { "orbitJobOperator": { "jobStatus": "JobCreated", "jobName": cronjob_instance_metadata.name, "nodeType": node_type, } } return "CronJobCreated" else: job = V1Job( api_version="batch/v1", kind="Job", metadata=V1ObjectMeta(labels={ **labels, **spec.get("compute", {}).get("labels", {}) }), spec=job_spec, ) kopf.adopt(job, nested="spec.template") job_instance: V1Job = BatchV1Api().create_namespaced_job( namespace=namespace, body=job) job_instance_metadata: V1ObjectMeta = job_instance.metadata logger.debug("Started Job: %s", job_instance_metadata.name) patch["metadata"] = {"labels": {"k8sJobType": "Job"}} patch["status"] = { "orbitJobOperator": { "jobStatus": "JobCreated", "jobName": job_instance_metadata.name, "nodeType": node_type, } } return "JobCreated"