def sanitize_resources( resources: Union[k8s_schemas.V1ResourceRequirements, Dict] ) -> Optional[k8s_schemas.V1ResourceRequirements]: def validate_resources(r_field: Dict) -> Dict: if not r_field: return r_field for k in r_field: r_field[k] = str(r_field[k]) return r_field if not resources: return None if isinstance(resources, Dict): return k8s_schemas.V1ResourceRequirements( limits=validate_resources(resources.get("limits", None)), requests=validate_resources(resources.get("requests", None)), ) else: return k8s_schemas.V1ResourceRequirements( limits=validate_resources(resources.limits), requests=validate_resources(resources.requests), )
def test_requests_gpu(self): assert (requests_gpu( k8s_schemas.V1ResourceRequirements(limits={"cpu": 1})) is False) assert (requests_gpu( k8s_schemas.V1ResourceRequirements(limits={"amd.com/gpu": 1})) is True) assert (requests_gpu( k8s_schemas.V1ResourceRequirements(requests={"nvidia.com/gpu": 1})) is True)
def test_requests_tpu(self): assert (requests_tpu( k8s_schemas.V1ResourceRequirements(limits={"cpu": 1})) is False) assert (requests_tpu( k8s_schemas.V1ResourceRequirements( limits={"cloud-tpus.google.com/v2": 1})) is True) assert (requests_tpu( k8s_schemas.V1ResourceRequirements( requests={"cloud-tpus.google.com/v2:": 32})) is True)
def get_default_notification_container(): return V1Container( name=MAIN_JOB_CONTAINER, image="polyaxon/polyaxon-events-handlers:{}".format("dev"), image_pull_policy=PullPolicy.ALWAYS.value, command=["polyaxon", "notify"], args=[ "--kind={{kind}}", "--owner={{owner}}", "--project={{project}}", "--run_uuid={{run_uuid}}", "--run_name={{run_name}}", "--condition={{condition}}", ], resources=k8s_schemas.V1ResourceRequirements( limits={ "cpu": "0.5", "memory": "100Mi" }, requests={ "cpu": "0.1", "memory": "20Mi" }, ), )
def get_default_notification_container(): return V1Container( name=MAIN_JOB_CONTAINER, image="polyaxon/polyaxon-events-handlers:{}".format(pkg.VERSION), image_pull_policy=PullPolicy.IF_NOT_PRESENT.value, command=["polyaxon", "notify"], args=[ "--kind={{kind}}", "--owner={{owner}}", "--project={{project}}", "--run-uuid={{run_uuid}}", "{{params.condition.as_arg}}", "{{params.run_name.as_arg}}", ], resources=k8s_schemas.V1ResourceRequirements( limits={ "cpu": "0.5", "memory": "100Mi" }, requests={ "cpu": "0.1", "memory": "20Mi" }, ), )
def get_batch_cleaner_container( store: V1ConnectionType, paths: List[str], ): subpaths = [os.path.join(store.store_path, subpath) for subpath in paths] subpaths = " ".join(["-sp={}".format(sp) for sp in subpaths]) clean_args = "polyaxon clean-artifacts {} {}".format( store.kind.replace("_", "-"), subpaths) return V1Container( name=MAIN_JOB_CONTAINER, image="polyaxon/polyaxon-init:{}".format(pkg.VERSION), image_pull_policy=PullPolicy.IF_NOT_PRESENT.value, command=["/bin/bash", "-c"], args=[clean_args], resources=k8s_schemas.V1ResourceRequirements( limits={ "cpu": "0.5", "memory": "160Mi" }, requests={ "cpu": "0.1", "memory": "80Mi" }, ), )
def test_get_main_container_simple_params(self): initial_mounts = [ k8s_schemas.V1VolumeMount(name="test", mount_path="/mount_test", read_only=True) ] resources = k8s_schemas.V1ResourceRequirements( requests={ "cpu": "1", "memory": "256Mi" }, limits={ "cpu": "1", "memory": "256Mi" }, ) container = get_main_container( container_id="new-name", main_container=k8s_schemas.V1Container( name="main", image="job_docker_image", image_pull_policy="IfNotPresent", command=["cmd", "-p", "-c"], args=["arg1", "arg2"], resources=resources, ), contexts=None, volume_mounts=initial_mounts, log_level="info", artifacts_store=None, init=None, connection_by_names=None, connections=None, secrets=None, config_maps=None, kv_env_vars=None, env=None, ports=23, run_path=None, ) assert container.name == "new-name" assert container.image == "job_docker_image" assert container.image_pull_policy == "IfNotPresent" assert container.command == ["cmd", "-p", "-c"] assert container.args == ["arg1", "arg2"] assert container.ports == [ k8s_schemas.V1ContainerPort(container_port=23) ] assert container.env == [ get_env_var(name=POLYAXON_KEYS_LOG_LEVEL, value="info") ] assert container.env_from == [] assert container.resources == resources assert container.volume_mounts == initial_mounts
def get_init_resources() -> k8s_schemas.V1ResourceRequirements: return k8s_schemas.V1ResourceRequirements( limits={ "cpu": "1", "memory": "200Mi" }, requests={ "cpu": "0.1", "memory": "20Mi" }, )
def test_get_init_resources(self): assert get_init_resources() == k8s_schemas.V1ResourceRequirements( limits={ "cpu": "1", "memory": "200Mi" }, requests={ "cpu": "0.1", "memory": "20Mi" }, )
def test_get_resources_env_vars(self): env_vars = get_resources_env_vars(None) assert len(env_vars) == 1 assert env_vars[0].name == "NVIDIA_VISIBLE_DEVICES" assert env_vars[0].value == "none" resources = k8s_schemas.V1ResourceRequirements(limits={"cpu": 1}) env_vars = get_resources_env_vars(resources) assert len(env_vars) == 1 assert env_vars[0].name == "NVIDIA_VISIBLE_DEVICES" assert env_vars[0].value == "none" resources = k8s_schemas.V1ResourceRequirements(limits={"memory": 1}) env_vars = get_resources_env_vars(resources) assert len(env_vars) == 1 assert env_vars[0].name == "NVIDIA_VISIBLE_DEVICES" assert env_vars[0].value == "none" resources = k8s_schemas.V1ResourceRequirements( requests={"nvidia.com/gpu": 1}) env_vars = get_resources_env_vars(resources) assert len(env_vars) == 0 assert env_vars == []
def get_default_tuner_container(command): return V1Container( name=MAIN_JOB_CONTAINER, image="polyaxon/polyaxon-hpsearch:{}".format("dev"), image_pull_policy=PullPolicy.ALWAYS.value, command=command, args=[ "--parallel={{parallel}}", "--configs={{configs}}", "--metrics={{metrics}}", ], resources=k8s_schemas.V1ResourceRequirements( requests={"cpu": "0.1", "memory": "180Mi"}, ), )
def requests_tpu( resources: Union[k8s_schemas.V1ResourceRequirements, Dict]) -> bool: if not resources: return False if not isinstance(resources, k8s_schemas.V1ResourceRequirements): resources = k8s_schemas.V1ResourceRequirements(**resources) if resources.requests: for key in resources.requests.keys(): if "tpu" in key: return True if resources.limits: for key in resources.limits.keys(): if "tpu" in key: return True return False
def get_default_tuner_container(command, bracket_iteration: int = None): args = [ "{{params.matrix.as_arg}}", "{{params.search.as_arg}}", "{{params.iteration.as_arg}}", ] if bracket_iteration is not None: args.append("{{params.bracket_iteration.as_arg}}") return V1Container( name=MAIN_JOB_CONTAINER, image="polyaxon/polyaxon-hpsearch:{}".format(pkg.VERSION), image_pull_policy=PullPolicy.IF_NOT_PRESENT.value, command=command, args=args, resources=k8s_schemas.V1ResourceRequirements(requests={ "cpu": "0.1", "memory": "180Mi" }, ), )
def get_default_cleaner_container(store: V1ConnectionType, run_path: str): subpath = os.path.join(store.store_path, run_path) return V1Container( name=MAIN_JOB_CONTAINER, image="polyaxon/polyaxon-init:{}".format(pkg.VERSION), image_pull_policy=PullPolicy.ALWAYS.value, command=["polyaxon", "clean-artifacts", store.kind.replace('_', '-')], args=["--subpath={}".format(subpath)], resources=k8s_schemas.V1ResourceRequirements( limits={ "cpu": "0.5", "memory": "100Mi" }, requests={ "cpu": "0.1", "memory": "20Mi" }, ), )
def get_default_cleaner_container(store: V1ConnectionType, run_uuid: str, run_kind: str): subpath = os.path.join(store.store_path, run_uuid) clean_args = "polyaxon clean-artifacts {} --subpath={}".format( store.kind.replace("_", "-"), subpath) wait_args = "polyaxon wait --uuid={} --kind={}".format(run_uuid, run_kind) return V1Container( name=MAIN_JOB_CONTAINER, image="polyaxon/polyaxon-init:{}".format(pkg.VERSION), image_pull_policy=PullPolicy.IF_NOT_PRESENT.value, command=["/bin/bash", "-c"], args=["{} && {}".format(wait_args, clean_args)], resources=k8s_schemas.V1ResourceRequirements( limits={ "cpu": "0.5", "memory": "160Mi" }, requests={ "cpu": "0.1", "memory": "80Mi" }, ), )