def get_pod_spec(name, ssh_key="", container="wshand/cutter:latest"): pod = client.V1Pod() pod.api_version = "v1" labels = {"app": "lawliet-env", "app-specific": get_pod_name(name)} pod.metadata = client.V1ObjectMeta(name=get_pod_name(name), labels=labels) ports = [ client.V1ContainerPort(container_port=22), client.V1ContainerPort(container_port=6080) ] container = client.V1Container( name=get_pod_name(name), image=container, image_pull_policy="Always", #command=["/bin/bash"], #args=["-c", "echo '%s' > ~/.ssh/authorized_keys && service ssh start; mkdir -p /dev/net && mknod /dev/net/tun c 10 200 && chmod 0666 /dev/net/tun; /start.sh" % ssh_key], ports=ports, security_context=client.V1SecurityContext( capabilities=client.V1Capabilities(add=["NET_ADMIN"]))) context = client.V1PodSecurityContext(sysctls=[ client.V1Sysctl(name="net.ipv6.conf.all.disable_ipv6", value="0") ]) logging.debug("made context") pod.spec = client.V1PodSpec(containers=[container], security_context=context) return pod
def get_security_context(): uid = conf.get(SECURITY_CONTEXT_USER) gid = conf.get(SECURITY_CONTEXT_GROUP) if uid and gid: return client.V1PodSecurityContext(fs_group=gid, run_as_user=uid, run_as_group=gid) return None
def _add_krb5_init_container(self, secrets_volume_mount): """Add sidecar container for a job.""" krb5_config_map_name = current_app.config["KRB5_CONFIGMAP_NAME"] ticket_cache_volume = {"name": "krb5-cache", "emptyDir": {}} krb5_config_volume = { "name": "krb5-conf", "configMap": {"name": krb5_config_map_name}, } volume_mounts = [ { "name": ticket_cache_volume["name"], "mountPath": current_app.config["KRB5_TOKEN_CACHE_LOCATION"], }, { "name": krb5_config_volume["name"], "mountPath": "/etc/krb5.conf", "subPath": "krb5.conf", }, ] keytab_file = os.environ.get("CERN_KEYTAB") cern_user = os.environ.get("CERN_USER") krb5_container = { "image": current_app.config["KRB5_CONTAINER_IMAGE"], "command": [ "kinit", "-kt", "/etc/reana/secrets/{}".format(keytab_file), "{}@CERN.CH".format(cern_user), ], "name": current_app.config["KRB5_CONTAINER_NAME"], "imagePullPolicy": "IfNotPresent", "volumeMounts": [secrets_volume_mount] + volume_mounts, "security_context": client.V1PodSecurityContext( run_as_group=WORKFLOW_RUNTIME_USER_GID, run_as_user=self.kubernetes_uid ), } self.job["spec"]["template"]["spec"]["volumes"].extend( [ticket_cache_volume, krb5_config_volume] ) self.job["spec"]["template"]["spec"]["containers"][0]["volumeMounts"].extend( volume_mounts ) # Add the Kerberos token cache file location to the job container # so every instance of Kerberos picks it up even if it doesn't read # the configuration file. self.job["spec"]["template"]["spec"]["containers"][0]["env"].append( { "name": "KRB5CCNAME", "value": os.path.join( current_app.config["KRB5_TOKEN_CACHE_LOCATION"], current_app.config["KRB5_TOKEN_CACHE_FILENAME"].format( self.kubernetes_uid ), ), } ) self.job["spec"]["template"]["spec"]["initContainers"].append(krb5_container)
def _add_krb5_init_container(self, secrets_volume_mount): """Add sidecar container for a job.""" krb5_config_map_name = current_app.config['KRB5_CONFIGMAP_NAME'] ticket_cache_volume = { 'name': 'krb5-cache', 'emptyDir': {} } krb5_config_volume = { 'name': 'krb5-conf', 'configMap': {'name': krb5_config_map_name} } volume_mounts = [ { 'name': ticket_cache_volume['name'], 'mountPath': current_app.config['KRB5_TOKEN_CACHE_LOCATION'] }, { 'name': krb5_config_volume['name'], 'mountPath': '/etc/krb5.conf', 'subPath': 'krb5.conf' } ] keytab_file = os.environ.get('CERN_KEYTAB') cern_user = os.environ.get('CERN_USER') krb5_container = { 'image': current_app.config['KRB5_CONTAINER_IMAGE'], 'command': ['kinit', '-kt', '/etc/reana/secrets/{}'.format(keytab_file), '{}@CERN.CH'.format(cern_user)], 'name': current_app.config['KRB5_CONTAINER_NAME'], 'imagePullPolicy': 'IfNotPresent', 'volumeMounts': [secrets_volume_mount] + volume_mounts, 'security_context': client.V1PodSecurityContext( run_as_group=WORKFLOW_RUNTIME_USER_GID, run_as_user=self.kubernetes_uid) } self.job['spec']['template']['spec']['volumes'].extend( [ticket_cache_volume, krb5_config_volume]) self.job['spec']['template']['spec']['containers'][0][ 'volumeMounts'].extend(volume_mounts) # Add the Kerberos token cache file location to the job container # so every instance of Kerberos picks it up even if it doesn't read # the configuration file. self.job['spec']['template']['spec']['containers'][0][ 'env'].append({'name': 'KRB5CCNAME', 'value': os.path.join( current_app.config['KRB5_TOKEN_CACHE_LOCATION'], current_app.config['KRB5_TOKEN_CACHE_FILENAME'] .format(self.kubernetes_uid) )}) self.job['spec']['template']['spec']['initContainers'].append( krb5_container)
def create_goofys_daemonset_object(name: str, user_id: str, envs: Sequence): envs = [] if not envs else envs container = client.V1Container( name=name, image="quay.io/bcdev/cate-s3-daemon:0.1", command=["/usr/local/bin/goofys", "-o", "allow_other", "--uid", "1000", "--gid", "1000", "-f", "--region", "eu-central-1", "eurodatacube:" + user_id, "/var/s3"], env=envs, image_pull_policy="Always", volume_mounts=[ { "mountPath": '/dev/fuse', "name": "devfuse-" + user_id, }, { "mountPath": "var/s3:shared", "name": 'mnt-goofys-' + user_id, }, ], security_context=client.V1SecurityContext(privileged=True, capabilities={'add': ["SYS_ADMIN"]}) ) template = client.V1PodTemplateSpec( metadata=client.V1ObjectMeta(labels={"app": name, "purpose": "cate-webapi"}), spec=client.V1PodSpec( containers=[container], volumes=[ { 'name': 'devfuse-' + user_id, 'hostPath': {'path': '/dev/fuse'} }, { 'name': 'mnt-goofys-' + user_id, 'hostPath': {'path': '/var/s3'} }], security_context=client.V1PodSecurityContext(fs_group=1000) ) ) spec = client.V1DeploymentSpec( replicas=1, template=template, selector={'matchLabels': {'app': name}} ) daemon_set = client.V1DaemonSet( metadata=client.V1ObjectMeta(name=name, labels={"app": name, "purpose": "cate-webapi"}), spec=spec, ) return daemon_set
def create_deployment_object(name: str, user_id: str, container_name: str, image: str, container_port: int, command: Optional[Union[str, Sequence[str]]] = None, envs: Optional[Sequence] = None, volume_mounts: Optional[Sequence] = None, volumes: Optional[Sequence] = None, init_containers: Optional[Sequence] = None): # Configureate Pod template container envs = [] if not envs else envs container = client.V1Container( name=container_name, image=image, command=command, env=envs, image_pull_policy="Always", ports=[client.V1ContainerPort(container_port=container_port)], volume_mounts=volume_mounts, security_context=client.V1SecurityContext(privileged=True) ) # Create and configurate a spec section template = client.V1PodTemplateSpec( metadata=client.V1ObjectMeta(labels={"app": container_name, "purpose": "cate-webapi"}), spec=client.V1PodSpec( containers=[container], volumes=volumes, init_containers=init_containers, security_context=client.V1PodSecurityContext(fs_group=1000) ) ) # Create the specification of deployment spec = client.V1DeploymentSpec( replicas=1, template=template, selector={'matchLabels': {'app': container_name}} ) # Instantiate the deployment object deployment = client.V1Deployment( api_version="apps/v1", kind="Deployment", metadata=client.V1ObjectMeta(name=name), spec=spec) return deployment
def _build_pod_spec(self, img, node): with start_action(action_type="_build_pod_spec"): spec = client.V1PodSpec( containers=[ client.V1Container( command=self.command, image=img, image_pull_policy="Always", name=self._podname_from_image(img), security_context=client.V1PodSecurityContext( run_as_user=self.args.uid) ) ], restart_policy="Never", node_name=node ) return spec
def _deployment(self): return client.V1Deployment( api_version='apps/v1', kind='Deployment', metadata=metadata(name=self.name, namespace=self.namespace), spec=client.V1DeploymentSpec( strategy=client.V1DeploymentStrategy( type='Recreate' ), selector=client.V1LabelSelector( match_labels=self.labels ), template=client.V1PodTemplateSpec( metadata=metadata( labels=self.labels, annotations=dict({ 'iam.amazonaws.com/role': self.role_arn}), ), spec=client.V1PodSpec( service_account_name=self.name, security_context=client.V1PodSecurityContext( fs_group=65534, ), containers=client.V1Container( name=self.name, image=self.image, args=[ '--source=service', '--source=ingress', f'--domain-filter={self.dns_domain}', '--provider=aws', '--registry=txt', '--txt-owner-id=hostedzone-identifier', ] ) ) ) ) )
def get_pod_template(name, c_labels, c_volume_mounts, c_image, c_resources, c_command, volumes): annotations = {"sidecar.istio.io/inject": "false"} tmp_metadata = client.V1ObjectMeta(annotations=annotations, labels=c_labels) containers = list() container = get_job_container(c_volume_mounts=c_volume_mounts, c_image=c_image, c_resouces=c_resources, name=name, c_command=c_command) containers.append(container) volume_list = get_job_volumes(volumes) security_context = client.V1PodSecurityContext(fs_group=100, run_as_user=0) pod_spec = client.V1PodSpec(containers=containers, restart_policy="Never", security_context=security_context, volumes=volume_list) return client.V1PodTemplateSpec(metadata=tmp_metadata, spec=pod_spec)
def userpod(pod_type, username, eppn, uid, ugid, groupname, ggid, annotations={}): """Starts a user pod Parameters ---------- pod_type: str The workload type of the pod to launch, e.g. "theia-python" username: str The short username for the user, e.g. "mst3k" eppn: str The eppn / long username, e.g. "*****@*****.**" uid: int The numeric user ID of the user that the pod will run as ugid: int The numeric GID of the user's primary group groupname: str Opaque group name for project mount ggid: int The numeric group ID the pod will run as annotations: dict Any additional annotations for the ingress Returns ------- pod_dns: str The partly-random domain name of the pos; can be used to check the pod's status, and determines the URL: https://<pod_dns> """ cfg = get_config() v1 = client.CoreV1Api() v1beta = client.ExtensionsV1beta1Api() namespace = cfg["NAMESPACE"] type_maps = v1.list_namespaced_config_map(namespace, label_selector="class=userpod") ntypes = len(type_maps.items) podcfg = None for i in range(ntypes): if type_maps.items[i].metadata.name == pod_type: podcfg = type_maps.items[i] break if not podcfg: raise Exception("Unsupported type") poddata = podcfg.data passwd = poddata["passwd"] passwd = passwd.replace("<UID>", str(uid)) passwd = passwd.replace("<UGID>", str(ugid)) group = poddata["group"] group = group.replace("<UGID>", str(ugid)) group = group.replace("<PGID>", str(ggid)) pod_port = poddata["port"] envoycfg = cfg["ENVOY_TEMPLATE"] envoyext = cfg["ENVOY_EXTERNAL"] envoyadm = cfg["ENVOY_ADMIN"] envoycfg = envoycfg.replace("<SERVICEPORT>", pod_port) envoycfg = envoycfg.replace("<ENVOYADMIN>", envoyadm) envoycfg = envoycfg.replace("<ENVOYEXTERNAL>", envoyext) envoycfg = envoycfg.replace("<SHORTUSER>", username) envoycfg = envoycfg.replace("<LONGUSER>", eppn) podmap = { "passwd": passwd, "group": group, "envoy": envoycfg, } username_label = gen_user_label(username) dashed_username = username.replace("@", "-") dashed_username = dashed_username.replace(".", "-") cfgmap = client.V1ConfigMap( api_version="v1", kind="ConfigMap", metadata=client.V1ObjectMeta( generate_name="%s-%s-" % (pod_type, dashed_username), labels={ "username-label": username_label, }, ), data=podmap, ) created_map = v1.create_namespaced_config_map(namespace, cfgmap) pod_name = created_map.metadata.name patch_labels = { "user-pod": pod_name, } label = { "metadata": { "labels": patch_labels, }, } v1.patch_namespaced_config_map(pod_name, namespace=namespace, body=label) resource_labels = { "username-label": username_label, "user-pod": pod_name, } pod_volumes = [ client.V1Volume(name="cfgfiles", config_map=client.V1ConfigMapVolumeSource( name=pod_name, )), ] userpod_mounts = [ client.V1VolumeMount( name="cfgfiles", mount_path="/etc/passwd", sub_path="passwd", ), client.V1VolumeMount( name="cfgfiles", mount_path="/etc/group", sub_path="group", ), ] if "HOME_PREFIX" in cfg: # NOTE / TODO: This will change eventually pod_volumes.append( client.V1Volume(name="home", host_path=client.V1HostPathVolumeSource( path="%s/%s" % (cfg["HOME_PREFIX"], username), type="Directory"))) userpod_mounts.append( client.V1VolumeMount(name="home", mount_path="/home/user")) if "PROJECT_PREFIX" in cfg: pod_volumes.append( client.V1Volume( name="project", host_path=client.V1HostPathVolumeSource( path="%s/%s" % (cfg["PROJECT_PREFIX"], groupname), type="Directory"))) userpod_mounts.append( client.V1VolumeMount(name="project", mount_path="/home/project")) registry = cfg["REGISTRY"] reg_org = cfg["REGISTRY_ORG"] envoy = cfg["ENVOY_CONTAINER"] supplemental = [] if ugid != ggid: supplemental = [ggid] pod = client.V1Pod(api_version="v1", kind="Pod", metadata=client.V1ObjectMeta( name=pod_name, labels=resource_labels, ), spec=client.V1PodSpec( volumes=pod_volumes, restart_policy="OnFailure", security_context=client.V1PodSecurityContext( supplemental_groups=supplemental), containers=[ client.V1Container( name=pod_type, image="%s/%s/%s:latest" % (registry, reg_org, pod_type), security_context=client.V1SecurityContext( run_as_user=uid, run_as_group=ugid, ), volume_mounts=userpod_mounts, ), client.V1Container( name="envoy", image=envoy, volume_mounts=[ client.V1VolumeMount( name="cfgfiles", mount_path="/etc/envoy/envoy.yaml", sub_path="envoy", ), ], ), ], )) v1.create_namespaced_pod(namespace=namespace, body=pod) service = client.V1Service(api_version="v1", kind="Service", metadata=client.V1ObjectMeta( name=pod_name, labels=resource_labels, namespace=namespace, ), spec=client.V1ServiceSpec( selector=resource_labels, ports=[ client.V1ServicePort( port=80, protocol="TCP", target_port=int(envoyext), ) ])) v1.create_namespaced_service(namespace, body=service) env_service = client.V1Service(api_version="v1", kind="Service", metadata=client.V1ObjectMeta( name="%s-envoy" % pod_name, labels=resource_labels, namespace=namespace, ), spec=client.V1ServiceSpec( selector=resource_labels, ports=[ client.V1ServicePort( port=9000, protocol="TCP", target_port=int(envoyadm), ) ])) v1.create_namespaced_service(namespace, body=env_service) pod_dom = cfg["POD_DOMAIN"] pod_dns = "%s.%s" % (pod_name, pod_dom) annotations["kubernetes.io/ingress.class"] = "nginx" ingress = client.ExtensionsV1beta1Ingress( metadata=client.V1ObjectMeta( name=pod_name, labels=resource_labels, namespace=namespace, annotations=annotations, ), spec=client.ExtensionsV1beta1IngressSpec(rules=[ client.ExtensionsV1beta1IngressRule( host=pod_dns, http=client.ExtensionsV1beta1HTTPIngressRuleValue(paths=[ client.ExtensionsV1beta1HTTPIngressPath( path="/", backend=client.ExtensionsV1beta1IngressBackend( service_name=pod_name, service_port=80, ), ) ], ), ) ], ), ) v1beta.create_namespaced_ingress(namespace, body=ingress) return pod_dns
def get_security_context(): return client.V1PodSecurityContext(fs_group=POLYAXON_FS_GROUP, run_as_user=POLYAXON_USER, run_as_group=POLYAXON_USER)
def generate_pod(): metadata = client.V1ObjectMeta( name="platform-app-958795556-2nqgj", namespace="production", generate_name="platform-app-958795556-", labels={ "app": "platform", "chart": "platform", "component": "app", "heritage": "Helm", "pod-template-hash": "958795556", "release": "platform-production", "version": "1.0.3", }, owner_references=[ client.V1OwnerReference( api_version="apps/v1", kind="ReplicaSet", name="platform-app-958795556", uid="35ba938b-681d-11eb-a74a-16e1a04d726b", controller=True, block_owner_deletion=True, ) ], ) container = client.V1Container( name="app", image="platform.azurecr.io/app:master", image_pull_policy="Always", termination_message_policy="File", termination_message_path="/dev/termination-log", env=[], resources=client.V1ResourceRequirements( limits={ "cpu": "1200m", "memory": "1Gi" }, requests={ "cpu": "1", "memory": "768Mi" }, ), ports=[client.V1ContainerPort(container_port=3000, protocol="TCP")], volume_mounts=[ client.V1VolumeMount( name="default-token-2cg25", read_only=True, mount_path="/var/run/secrets/kubernetes.io/serviceaccount", ) ], liveness_probe=client.V1Probe( initial_delay_seconds=10, timeout_seconds=5, period_seconds=10, success_threshold=1, failure_threshold=6, http_get=client.V1HTTPGetAction(path="/health/liveness", port=3000, scheme="HTTP"), ), readiness_probe=client.V1Probe( initial_delay_seconds=10, timeout_seconds=5, period_seconds=10, success_threshold=2, failure_threshold=6, http_get=client.V1HTTPGetAction(path="/health/readness", port=3000, scheme="HTTP"), ), ) spec = client.V1PodSpec( containers=[container], volumes=[ client.V1Volume( name="default-token-2cg25", secret=client.V1SecretVolumeSource( secret_name="default-token-2cg25", default_mode=420), ) ], restart_policy="Always", termination_grace_period_seconds=30, dns_policy="ClusterFirst", service_account_name="default", service_account="default", node_name="aks-agentpool-26722002-vmss00039t", security_context=client.V1PodSecurityContext(run_as_user=1000, fs_group=1000), scheduler_name="default-scheduler", tolerations=[ client.V1Toleration( key="node.kubernetes.io/not-ready", operator="Exists", effect="NoExecute", toleration_seconds=300, ), client.V1Toleration( key="node.kubernetes.io/unreachable", operator="Exists", effect="NoExecute", toleration_seconds=300, ), ], priority=0, enable_service_links=True, ) return client.V1Pod(metadata=metadata, spec=spec)
def single_test(namespace, image, args, replicas, timeout, keep, cpu_limit, memory_limit): deployment_name = f'scheduletest-{time.time():.0f}' log.info( f'Deploying {deployment_name}. Will timeout after {timeout}s. Will keep for {keep}s.' ) labels = { 'test': deployment_name, 'app': 'scheduletester', } match_expressions = [] for label_key, label_value in labels.items(): match_expressions.append( client.V1LabelSelectorRequirement(key=label_key, operator='In', values=[label_value])) label_selector = ','.join(f'{k}={v}' for k, v in labels.items()) resources_limits = { 'cpu': cpu_limit, 'memory': memory_limit, } # new deployment deployment = client.V1Deployment( metadata=client.V1ObjectMeta( name=deployment_name, namespace=namespace, labels=labels, ), spec=client.V1DeploymentSpec( replicas=replicas, selector=client.V1LabelSelector(match_labels=labels), template=client.V1PodTemplateSpec( metadata=client.V1ObjectMeta(name=deployment_name, labels=labels), spec=client.V1PodSpec( service_account='default', security_context=client.V1PodSecurityContext( fs_group=1000, run_as_user=1000, ), affinity=client. V1Affinity(pod_anti_affinity=client.V1PodAntiAffinity( required_during_scheduling_ignored_during_execution=[ client.V1PodAffinityTerm( label_selector=client.V1LabelSelector( match_expressions=match_expressions), topology_key='kubernetes.io/hostname') ])), termination_grace_period_seconds=1, containers=[ client.V1Container( name='canary', image=image, args=args, resources=client.V1ResourceRequirements( limits=resources_limits, requests=resources_limits, ), ) ])))) start_time = time.time() response = client.AppsV1Api().create_namespaced_deployment(namespace, body=deployment) duration = 0 ready_replicas = 0 w = watch.Watch() for event in w.stream(client.AppsV1Api().list_namespaced_deployment, namespace, label_selector=label_selector, timeout_seconds=timeout): log.debug(f'Received event: {event["type"]}') event_ready_replicas = event['object'].status.ready_replicas if event_ready_replicas: ready_replicas = event_ready_replicas log.debug(f'{ready_replicas} of {replicas} pods are available') if ready_replicas == replicas: duration = time.time() - start_time w.stop() if duration > 0: prom_time_to_deployment_ready.observe(duration) log.info(f'All {replicas} pods ready after {duration:.2f}s') time.sleep(keep) else: prom_deployment_timeouts.inc() log.warning(f'Timeout: only {ready_replicas} of {replicas} pods ready') # cleaning up log.info('Deleting the deployment') client.AppsV1Api().delete_namespaced_deployment(deployment_name, namespace)
def execute(self): """Execute a job in Kubernetes.""" backend_job_id = str(uuid.uuid4()) self.job = { 'kind': 'Job', 'apiVersion': 'batch/v1', 'metadata': { 'name': backend_job_id, 'namespace': K8S_DEFAULT_NAMESPACE }, 'spec': { 'backoffLimit': KubernetesJobManager.MAX_NUM_JOB_RESTARTS, 'autoSelector': True, 'template': { 'metadata': { 'name': backend_job_id }, 'spec': { 'containers': [ { 'image': self.docker_img, 'command': self.cmd, 'name': 'job', 'env': [], 'volumeMounts': [], } ], 'initContainers': [], 'volumes': [], 'restartPolicy': 'Never' } } } } user_id = os.getenv('REANA_USER_ID') secrets_store = REANAUserSecretsStore(user_id) secret_env_vars = secrets_store.get_env_secrets_as_k8s_spec() self.job['spec']['template']['spec']['containers'][0]['env'].extend( secret_env_vars ) self.job['spec']['template']['spec']['volumes'].append( secrets_store.get_file_secrets_volume_as_k8s_specs() ) secrets_volume_mount = \ secrets_store.get_secrets_volume_mount_as_k8s_spec() self.job['spec']['template']['spec']['containers'][0]['volumeMounts'] \ .append(secrets_volume_mount) if self.env_vars: for var, value in self.env_vars.items(): self.job['spec']['template']['spec'][ 'containers'][0]['env'].append({'name': var, 'value': value}) self.add_hostpath_volumes() self.add_shared_volume() self.add_eos_volume() self.add_image_pull_secrets() if self.cvmfs_mounts != 'false': cvmfs_map = {} for cvmfs_mount_path in ast.literal_eval(self.cvmfs_mounts): if cvmfs_mount_path in CVMFS_REPOSITORIES: cvmfs_map[ CVMFS_REPOSITORIES[cvmfs_mount_path]] = \ cvmfs_mount_path for repository, mount_path in cvmfs_map.items(): volume = get_k8s_cvmfs_volume(repository) (self.job['spec']['template']['spec']['containers'][0] ['volumeMounts'].append( {'name': volume['name'], 'mountPath': '/cvmfs/{}'.format(mount_path), 'readOnly': volume['readOnly']} )) self.job['spec']['template']['spec']['volumes'].append(volume) self.job['spec']['template']['spec']['securityContext'] = \ client.V1PodSecurityContext( run_as_group=WORKFLOW_RUNTIME_USER_GID, run_as_user=self.kubernetes_uid) if self.kerberos: self._add_krb5_init_container(secrets_volume_mount) backend_job_id = self._submit() return backend_job_id
class K8sJobEngine(): def __init__(self): self.log = get_logger(self.__class_name.__name__) if kalytical_config.kalytical_endpoint is None: # This is the API endpoint we send back to the pod for a callback/interaction during pipeline running. It may be behind a load balancer/DNS - i.e. it can't communicate with local host) raise ConfigException( "Config is missing parameter for kalytical API endpoint!") try: # Defaults to the service account asssigned to th epod config.load_incluster_config() except CDonfigException: self.log.warn( f"Could not load kube configuration from pod! Attempting to configure client with local kubeconfig={config.KUBE_CONFIG_DEFAULT_LOCATION}") config.load_kube_config() self._k8s_core_client = client.CoreV1Api() self.log = get_logger(self. __class__.__name__) self._running_job_list = None @staticmethod def _get_default_container_args() -> list: return kalytical_config.k8spodengine_default_container_args @staticmethod def _get_default_container_uri() -> str: return kalytical_config.default_pipeline_image_uri async def submit_job(self, header_model: PipelineHeaderModel, exec_uuid: str, source_uuid: str = None, retry_count: int = 0) -> RunningPipelineModel: self.log.info( f"Attempting to submit pod for pipeline_uuid={header_model.pipeline_uuid}") job_pod = self.marshall_k8s_pod( header_model=header_model, exec_uuid=exec_uuid, source_uuid=source_uuid, retry_count=retry_count) # TODO Handle cases where pod create fails - i.e. resource starvation pod_resp = self._k8s_core_client.create_namespaced_pod( namespace=kalytical_config.k8spodengine_k8s_namespace, body=job_pod) return self.unmarshall_pod(pod_obj=pod_resp) async def marshall_+k8s_pod(self, header_model: PipelineHeaderModel, exec_uuid: str, source_uuid: str = None, retry_count: int = 0) 0 > client.V1Pod(): common_job_name = '-'.join(exec_uuid, header_model.pipeline_uuid, str(retry_count)) if 'pipeline_args' in header_model.engine_args.keys(): pipeline_args = header_model.engine_args['pipeline_args'] else: pipeline_args = self._get_default_engine_args() if 'pipeline_command' in header_model.engine_args.keys(): pipeline_command = header_model.engine_args['pipeline_command'] else: pipeline_command = self._get_default_container_command() if 'pipeline_image' in header_model.engine_args.keys(): pipeline_image = header_model.engine_args['pipeline_image'] else: pipeline_image = self._get_default_container_uri() container_spec = client.V1Container( name=common_job_name, image=pipeline_image, args=pipeline_args, command=pipeline_command, env=[ client.V1EnvVar(name="PIPELINE_UUID", value=header_model.pipeline_uuid), client.V1EnvVar(name="SOURCE_UUID", value=json.dumps( json.dumps(source_uuid))), client.V1EnvVar(name="EXEC_UUID", value=exec_uuid), client.V1EnvVar(name="RETRY_COUNT", value=str(retry_count)), client.V1EnvVar(name="MQ_CALLBACK_URL", value=kalytical_config.mq_url), client.V1EnvVar(name="KALYTICAL_AUTH_SECRET", value=kalytical_config.api_secret), client.V1EnvVar(name="KALYTICAL_API_ENDPOINT", value=kalytical_config.api_endpoint) ], resources=client.V1ResourceRequirements( limit={'cpu': header_model.engine_args['cpu_count'], 'memory': header_model.engine_args['memory_gi']}), pod_spec=client.V1PodSpec(service_account_name=kalytical_config.k8spodengine_svc_account_name, node_selector={"kalkytical.k8s.node/workload": pipeline, "beta.kubernetes.io/instance-type": header_model.engine_args['instance_type']}, tolerations[client.V1Toleration(key="node.kubernetes.io/pipeline", operator="Exists", effect='NoSchedule')], security_context=client.V1PodSecurityContext(fs_group=100), restart_policy=Never, container=[container_spec]) # TODO TOlerations and selectors might not work for a generic use case return client.V1Pod(spec=pod_spec, metadata=client.V1ObjectMeta(name=common_job_name, label{"pod_source": "kalytical", "exec_uuid": exec_uuid, "pipeline_uuid": header_model.pipeline_uuid})))
def _create_deployment_object(self): """Creates the deployment object for the grader service using environment variables Returns: V1Deployment: a valid kubernetes deployment object """ # Configureate Pod template container # Volumes to mount as subPaths of PV sub_path_grader_home = str(self.course_dir.parent).strip('/') sub_path_exchange = str( self.exchange_dir.relative_to(EXCHANGE_MNT_ROOT)) # define the container to launch container = client.V1Container( name='grader-notebook', image=GRADER_IMAGE_NAME, command=[ 'start-notebook.sh', f'--group=formgrade-{self.course_id}' ], ports=[client.V1ContainerPort(container_port=8888)], working_dir=f'/home/{self.grader_name}', resources=client.V1ResourceRequirements(requests={ "cpu": "100m", "memory": "200Mi" }, limits={ "cpu": "500m", "memory": "1G" }), security_context=client.V1SecurityContext( allow_privilege_escalation=False), env=[ client.V1EnvVar(name='JUPYTERHUB_SERVICE_NAME', value=self.course_id), client.V1EnvVar(name='JUPYTERHUB_API_TOKEN', value=self.grader_token), # we're using the K8s Service name 'hub' (defined in the jhub helm chart) # to connect from our grader-notebooks client.V1EnvVar(name='JUPYTERHUB_API_URL', value='http://hub:8081/hub/api'), client.V1EnvVar(name='JUPYTERHUB_BASE_URL', value='/'), client.V1EnvVar(name='JUPYTERHUB_SERVICE_PREFIX', value=f'/services/{self.course_id}/'), client.V1EnvVar(name='JUPYTERHUB_CLIENT_ID', value=f'service-{self.course_id}'), client.V1EnvVar(name='JUPYTERHUB_USER', value=self.grader_name), client.V1EnvVar(name='NB_UID', value=str(NB_UID)), client.V1EnvVar(name='NB_GID', value=str(NB_GID)), client.V1EnvVar(name='NB_USER', value=self.grader_name), ], volume_mounts=[ client.V1VolumeMount(mount_path=f'/home/{self.grader_name}', name=GRADER_PVC, sub_path=sub_path_grader_home), client.V1VolumeMount(mount_path='/srv/nbgrader/exchange', name=GRADER_EXCHANGE_SHARED_PVC, sub_path=sub_path_exchange), ], ) # Create and configurate a spec section template = client.V1PodTemplateSpec( metadata=client.V1ObjectMeta(labels={ 'component': self.grader_name, 'app': 'illumidesk' }), spec=client.V1PodSpec( containers=[container], security_context=client.V1PodSecurityContext(run_as_user=0), volumes=[ client.V1Volume( name=GRADER_PVC, persistent_volume_claim=client. V1PersistentVolumeClaimVolumeSource( claim_name=GRADER_PVC), ), client.V1Volume( name=GRADER_EXCHANGE_SHARED_PVC, persistent_volume_claim=client. V1PersistentVolumeClaimVolumeSource( claim_name=GRADER_EXCHANGE_SHARED_PVC), ), ], ), ) # Create the specification of deployment spec = client.V1DeploymentSpec( replicas=1, template=template, selector={'matchLabels': { 'component': self.grader_name }}) # Instantiate the deployment object deployment = client.V1Deployment( api_version="apps/v1", kind="Deployment", metadata=client.V1ObjectMeta(name=self.grader_name), spec=spec) return deployment
def create_pod(value_pod, pvc_name, pod_name, created_objects, image_name="nginx:1.19.0"): """ creates pod Args: param1: value_pod - values required for creation of pod param2: pvc_name - name of pvc , pod associated with param3: pod_name - name of pod to be created param4: image_name - name of the pod image (Default:"nginx:1.19.0") param5: run_as_user - value for pod securityContext spec runAsUser param6: run_as_group - value for pod securityContext spec runAsGroup Returns: None Raises: Raises an exception on kubernetes client api failure and asserts """ if value_pod["read_only"] == "True": value_pod["read_only"] = True elif value_pod["read_only"] == "False": value_pod["read_only"] = False api_instance = client.CoreV1Api() pod_metadata = client.V1ObjectMeta(name=pod_name, labels={"app": "nginx"}) pod_persistent_volume_claim = client.V1PersistentVolumeClaimVolumeSource( claim_name=pvc_name, read_only=value_pod["read_only"]) pod_volumes = client.V1Volume( name="mypvc", persistent_volume_claim=pod_persistent_volume_claim) pod_ports = client.V1ContainerPort(container_port=80) if "sub_path" not in value_pod: pod_volume_mounts = client.V1VolumeMount( name="mypvc", mount_path=value_pod["mount_path"]) pod_containers = client.V1Container(name="web-server", image=image_name, volume_mounts=[pod_volume_mounts], ports=[pod_ports]) else: list_pod_volume_mount = [] for iter_num, single_sub_path in enumerate(value_pod["sub_path"]): final_mount_path = value_pod[ "mount_path"] if iter_num == 0 else value_pod[ "mount_path"] + str(iter_num) list_pod_volume_mount.append( client.V1VolumeMount( name="mypvc", mount_path=final_mount_path, sub_path=single_sub_path, read_only=value_pod["volumemount_readonly"][iter_num])) command = ["/bin/sh", "-c", "--"] args = ["while true; do sleep 30; done;"] pod_containers = client.V1Container( name="web-server", image=image_name, volume_mounts=list_pod_volume_mount, ports=[pod_ports], command=command, args=args) if "gid" in value_pod and "uid" in value_pod: pod_security_context = client.V1PodSecurityContext( run_as_group=int(value_pod["gid"]), run_as_user=int(value_pod["uid"])) pod_spec = client.V1PodSpec(containers=[pod_containers], volumes=[pod_volumes], node_selector=nodeselector, security_context=pod_security_context) else: pod_spec = client.V1PodSpec(containers=[pod_containers], volumes=[pod_volumes], node_selector=nodeselector) pod_body = client.V1Pod(api_version="v1", kind="Pod", metadata=pod_metadata, spec=pod_spec) try: LOGGER.info( f'POD Create : creating pod {pod_name} using {pvc_name} with {image_name} image with parameters {value_pod}' ) api_response = api_instance.create_namespaced_pod( namespace=namespace_value, body=pod_body, pretty=True) LOGGER.debug(str(api_response)) if pod_name[0:12] == "snap-end-pod": created_objects["restore_pod"].append(pod_name) elif pod_name[0:5] == "clone": created_objects["clone_pod"].append(pod_name) else: created_objects["pod"].append(pod_name) except ApiException as e: LOGGER.error( f"Exception when calling CoreV1Api->create_namespaced_pod: {e}") cleanup.clean_with_created_objects(created_objects) assert False
def add(ip, game_id, params): game=get_game_by_id(game_id) game.validate_params(params) uid=uuid.uuid4().hex[:12] name="gaas-{}".format(uid) labels={ "app": "gaas", "game": game_id, "server": uid, "creator": ip, } metadata=client.V1ObjectMeta( labels=labels, name=name, ) ip_ext=alloc_ip() extra_env=[client.V1EnvVar( name="IP_ALLOC", value=ip_ext ), client.V1EnvVar( name="IP_CREATOR", value=ip )] containers = game.make_deployment(params) generic_ports = [] # TODO(bluecmd): Hack to work around that not all # ports are routed to the VIP by default. This allows # outgoing connections from inside the pod on the VIP. for p in range(50000, 50016): generic_ports.append(client.V1ServicePort( name="internal-tcp-" + str(p), port=p, target_port=p, protocol="TCP")) generic_ports.append(client.V1ServicePort( name="internal-udp-" + str(p), port=p, target_port=p, protocol="UDP")) for container in containers: if container.env: container.env.extend(extra_env) else: container.env = extra_env if not container.resources: container.resources=client.V1ResourceRequirements( limits={ "cpu": "4", "memory": "32G" }, requests={ "cpu": "2", "memory": "16G" } ) deployment=client.V1Deployment( spec=client.V1DeploymentSpec( replicas=1, strategy=client.AppsV1beta1DeploymentStrategy( rolling_update=client.AppsV1beta1RollingUpdateDeployment( max_surge=0, max_unavailable=1 ) ), selector=client.V1LabelSelector( match_labels=labels, ), template=client.V1PodTemplateSpec( spec=client.V1PodSpec( containers=containers, termination_grace_period_seconds=0, # TODO(bluecmd): Hack to work around that not all # ports are routed to the VIP by default. This allows # outgoing connections from inside the pod on the VIP. security_context=client.V1PodSecurityContext( sysctls=[client.V1Sysctl( name='net.ipv4.ip_local_port_range', value='50000 50015')]), affinity=client.V1Affinity( node_affinity=client.V1NodeAffinity( required_during_scheduling_ignored_during_execution=client.V1NodeSelector( node_selector_terms=[ client.V1NodeSelectorTerm( match_expressions=[ client.V1NodeSelectorRequirement( key="kubernetes.io/role", operator="NotIn", values=["shared"] ) ] ) ] ) ) ) ) ) ) ) service=client.V1Service( spec=client.V1ServiceSpec( type="ClusterIP", selector=labels, ports=game.make_service(params) + generic_ports, external_i_ps=[ip_ext], ) ) deployment.metadata=metadata deployment.spec.template.metadata=metadata service.metadata=metadata service.metadata.annotations={"kube-router.io/service.dsr": "tunnel"} client.AppsV1Api().create_namespaced_deployment( namespace=NAMESPACE, body=deployment, ) service_resp = client.CoreV1Api().create_namespaced_service( namespace=NAMESPACE, body=service, ) return {"uid": uid, "ip": ip}
def execute(self): """Execute a job in Kubernetes.""" backend_job_id = build_unique_component_name("run-job") self.job = { "kind": "Job", "apiVersion": "batch/v1", "metadata": { "name": backend_job_id, "namespace": REANA_RUNTIME_KUBERNETES_NAMESPACE, }, "spec": { "automountServiceAccountToken": False, "backoffLimit": KubernetesJobManager.MAX_NUM_JOB_RESTARTS, "autoSelector": True, "template": { "metadata": { "name": backend_job_id, "labels": {"reana-run-job-workflow-uuid": self.workflow_uuid}, }, "spec": { "containers": [ { "image": self.docker_img, "command": ["bash", "-c"], "args": [self.cmd], "name": "job", "env": [], "volumeMounts": [], } ], "initContainers": [], "volumes": [], "restartPolicy": "Never", "enableServiceLinks": False, }, }, }, } user_id = os.getenv("REANA_USER_ID") secrets_store = REANAUserSecretsStore(user_id) secret_env_vars = secrets_store.get_env_secrets_as_k8s_spec() job_spec = self.job["spec"]["template"]["spec"] job_spec["containers"][0]["env"].extend(secret_env_vars) job_spec["volumes"].append(secrets_store.get_file_secrets_volume_as_k8s_specs()) secrets_volume_mount = secrets_store.get_secrets_volume_mount_as_k8s_spec() job_spec["containers"][0]["volumeMounts"].append(secrets_volume_mount) if self.env_vars: for var, value in self.env_vars.items(): job_spec["containers"][0]["env"].append({"name": var, "value": value}) self.add_memory_limit(job_spec) self.add_hostpath_volumes() self.add_workspace_volume() self.add_shared_volume() self.add_eos_volume() self.add_image_pull_secrets() self.add_kubernetes_job_timeout() if self.cvmfs_mounts != "false": cvmfs_map = {} for cvmfs_mount_path in ast.literal_eval(self.cvmfs_mounts): if cvmfs_mount_path in CVMFS_REPOSITORIES: cvmfs_map[CVMFS_REPOSITORIES[cvmfs_mount_path]] = cvmfs_mount_path for repository, mount_path in cvmfs_map.items(): volume = get_k8s_cvmfs_volume(repository) ( job_spec["containers"][0]["volumeMounts"].append( { "name": volume["name"], "mountPath": "/cvmfs/{}".format(mount_path), "readOnly": volume["readOnly"], } ) ) job_spec["volumes"].append(volume) self.job["spec"]["template"]["spec"][ "securityContext" ] = client.V1PodSecurityContext( run_as_group=WORKFLOW_RUNTIME_USER_GID, run_as_user=self.kubernetes_uid ) if self.kerberos: self._add_krb5_init_container(secrets_store) if self.voms_proxy: self._add_voms_proxy_init_container(secrets_volume_mount, secret_env_vars) if REANA_RUNTIME_JOBS_KUBERNETES_NODE_LABEL: self.job["spec"]["template"]["spec"][ "nodeSelector" ] = REANA_RUNTIME_JOBS_KUBERNETES_NODE_LABEL backend_job_id = self._submit() return backend_job_id
def _create_deployment_object(self): """Creates the deployment object for the grader service using environment variables Returns: V1Deployment: a valid kubernetes deployment object """ # Configureate Pod template container # Volumes to mount as subPaths of PV sub_path_grader_home = str(self.course_dir.parent).strip("/") sub_path_exchange = str(self.exchange_dir.relative_to(EXCHANGE_MNT_ROOT)) # define the container to launch container = client.V1Container( name="grader-notebook", image=GRADER_IMAGE_NAME, image_pull_policy=GRADER_IMAGE_PULL_POLICY, command=["start-notebook.sh", f"--group=formgrade-{self.course_id}"], ports=[client.V1ContainerPort(container_port=8888)], working_dir=f"/home/{self.grader_name}", resources=client.V1ResourceRequirements( requests={ "cpu": GRADER_REQUESTS_CPU, "memory": GRADER_REQUESTS_MEM, }, limits={ "cpu": GRADER_LIMITS_CPU, "memory": GRADER_LIMITS_MEM, }, ), security_context=client.V1SecurityContext(allow_privilege_escalation=False), env=[ client.V1EnvVar(name="JUPYTERHUB_SERVICE_NAME", value=self.course_id), client.V1EnvVar( name="JUPYTERHUB_SERVICE_URL", value=f"http://{self.course_id}.{NAMESPACE}.svc.cluster.local:8888", ), client.V1EnvVar(name="JUPYTERHUB_API_TOKEN", value=self.grader_token), # we're using the K8s Service name 'hub' (defined in the jhub helm chart) # to connect from our grader-notebooks client.V1EnvVar(name="JUPYTERHUB_API_URL", value=JUPYTERHUB_API_URL), client.V1EnvVar(name="JUPYTERHUB_BASE_URL", value=JUPYTERHUB_BASE_URL), client.V1EnvVar( name="JUPYTERHUB_SERVICE_PREFIX", value=f"/services/{self.course_id}/", ), client.V1EnvVar( name="JUPYTERHUB_CLIENT_ID", value=f"service-{self.course_id}" ), client.V1EnvVar(name="JUPYTERHUB_USER", value=self.grader_name), client.V1EnvVar(name="NAMESPACE", value=str(NAMESPACE)), client.V1EnvVar(name="NB_UID", value=str(NB_UID)), client.V1EnvVar(name="NB_GID", value=str(NB_GID)), client.V1EnvVar(name="NB_USER", value=self.grader_name), ], volume_mounts=[ client.V1VolumeMount( mount_path=f"/home/{self.grader_name}", name=GRADER_PVC, sub_path=sub_path_grader_home, ), client.V1VolumeMount( mount_path="/srv/nbgrader/exchange", name=GRADER_EXCHANGE_SHARED_PVC, sub_path=sub_path_exchange, ), ], ) # Create and configure a spec section template = client.V1PodTemplateSpec( metadata=client.V1ObjectMeta( labels={"component": self.grader_name, "app": "illumidesk"} ), spec=client.V1PodSpec( containers=[container], security_context=client.V1PodSecurityContext(run_as_user=0), volumes=[ client.V1Volume( name=GRADER_PVC, persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource( claim_name=GRADER_PVC ), ), client.V1Volume( name=GRADER_EXCHANGE_SHARED_PVC, persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource( claim_name=GRADER_EXCHANGE_SHARED_PVC ), ), ], ), ) # Create the specification of deployment spec = client.V1DeploymentSpec( replicas=1, template=template, selector={"matchLabels": {"component": self.grader_name}}, ) # Instantiate the deployment object deployment = client.V1Deployment( api_version="apps/v1", kind="Deployment", metadata=client.V1ObjectMeta(name=self.grader_name), spec=spec, ) return deployment
def create_upgrade_job(site_name, base_pvc_name): not_set = "NOT_SET" if not site_name or not base_pvc_name: frappe.local.response["http_status_code"] = 400 return { "site_name": site_name or not_set, "base_pvc_name": base_pvc_name or not_set, } k8s_settings = frappe.get_single("K8s Bench Settings") if ( not k8s_settings.namespace or not k8s_settings.nginx_image or not k8s_settings.python_image or not k8s_settings.pvc_name ): out = { "namespace": k8s_settings.namespace or not_set, "nginx_image": k8s_settings.nginx_image or not_set, "python_image": k8s_settings.python_image or not_set, "pvc_name": k8s_settings.pvc_name or not_set, } frappe.local.response["http_status_code"] = 501 return out job_name = f"{UPGRADE_SITE}-{site_name}" load_config() batch_v1_api = client.BatchV1Api() body = client.V1Job(api_version="batch/v1", kind="Job") body.metadata = client.V1ObjectMeta(namespace=k8s_settings.namespace, name=job_name) body.status = client.V1JobStatus() body.spec = client.V1JobSpec( template=client.V1PodTemplateSpec( spec=client.V1PodSpec( init_containers=[ client.V1Container( name="populate-assets", image=k8s_settings.nginx_image, command=["/bin/bash", "-c"], args=["rsync -a --delete /var/www/html/assets/frappe /assets"], volume_mounts=[ client.V1VolumeMount( name="assets-cache", mount_path="/assets" ), ], ) ], security_context=client.V1PodSecurityContext( supplemental_groups=[1000] ), containers=[ client.V1Container( name="upgrade-site", image=k8s_settings.python_image, command=["/home/frappe/frappe-bench/env/bin/python"], args=["/home/frappe/frappe-bench/commands/upgrade_site.py"], volume_mounts=[ client.V1VolumeMount( name=SITES_DIR, mount_path="/home/frappe/frappe-bench/sites", ), client.V1VolumeMount( name=BASE_SITES_DIR, mount_path="/opt/base-sites" ), client.V1VolumeMount( name=UPGRADE_SITE, mount_path="/home/frappe/frappe-bench/commands", ), client.V1VolumeMount( name=ASSETS_CACHE, mount_path="/assets" ), ], env=[ client.V1EnvVar(name="SITE_NAME", value=site_name), client.V1EnvVar( name="FROM_BENCH_PATH", value="/opt/base-sites" ), ], ) ], restart_policy="Never", volumes=[ client.V1Volume( name=SITES_DIR, persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource( claim_name=k8s_settings.pvc_name, read_only=False ), ), client.V1Volume( name=BASE_SITES_DIR, persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource( claim_name=base_pvc_name, read_only=False, ), ), client.V1Volume( name=UPGRADE_SITE, config_map=client.V1ConfigMapVolumeSource(name=UPGRADE_SITE), ), client.V1Volume( name=ASSETS_CACHE, empty_dir=client.V1EmptyDirVolumeSource() ), ], ) ) ) try: api_response = batch_v1_api.create_namespaced_job( k8s_settings.namespace, body, pretty=True ) return job_name + " created" except (ApiException, Exception) as e: status_code = getattr(e, "status", 500) out = { "error": e, "params": {"site_name": site_name, "base_pvc_name": base_pvc_name}, } reason = getattr(e, "reason") if reason: out["reason"] = reason frappe.log_error(out, "Exception: BatchV1Api->create_namespaced_job") frappe.local.response["http_status_code"] = status_code return out