def test_get_pod_volumes(self): settings.DATA_CLAIM_NAME = 'test-claim-data' settings.OUTPUTS_CLAIM_NAME = 'test-claim-outputs' volumes, volume_mounts = get_pod_volumes() assert len(volumes) == 2 assert volumes[0].persistent_volume_claim.claim_name == 'test-claim-data' assert volumes[1].persistent_volume_claim.claim_name == 'test-claim-outputs' settings.DATA_CLAIM_NAME = 'test-claim-data' settings.OUTPUTS_CLAIM_NAME = 'test-claim-outputs' settings.EXTRA_PERSISTENCES = [{ 'mountPath': '/storage/1', 'existingClaim': 'test-claim-extra-1' }, { 'mountPath': '/storage/2', 'hostPath': '/root/test' }] volumes, volume_mounts = get_pod_volumes() assert len(volumes) == 4 assert volumes[0].persistent_volume_claim.claim_name == 'test-claim-data' assert volumes[1].persistent_volume_claim.claim_name == 'test-claim-outputs' assert volumes[2].persistent_volume_claim.claim_name == 'test-claim-extra-1' assert volumes[3].host_path.path == '/root/test' assert volume_mounts[2].mount_path == '/storage/1' assert volume_mounts[3].mount_path == '/storage/2'
def test_get_pod_volumes_with_specified_values(self): volumes, volume_mounts = get_pod_volumes( persistence_outputs='outputs2', persistence_data=['data2']) assert len(volumes) == 2 assert volumes[0].host_path.path == '/root/outputs' assert volumes[1].host_path.path == '/root/data' assert volume_mounts[0].mount_path == '/outputs/2' assert volume_mounts[1].mount_path == '/data/2' volumes, volume_mounts = get_pod_volumes( persistence_outputs='outputs1', persistence_data=['data1', 'data2']) assert len(volumes) == 3 assert volumes[ 0].persistent_volume_claim.claim_name == 'test-claim-outputs-1' data_claim_name = None data_host_path = None mount_path1 = None mount_path2 = None if volumes[1].name == 'data1': data_claim_name = volumes[1].persistent_volume_claim.claim_name data_host_path = volumes[2].host_path.path mount_path1 = volume_mounts[1].mount_path mount_path2 = volume_mounts[2].mount_path elif volumes[1].name == 'data2': data_host_path = volumes[1].host_path.path data_claim_name = volumes[2].persistent_volume_claim.claim_name mount_path2 = volume_mounts[1].mount_path mount_path1 = volume_mounts[2].mount_path assert data_claim_name == 'test-claim-data-1' assert data_host_path == '/root/data' assert mount_path1 == '/data/1' assert mount_path2 == '/data/2'
def test_default_get_pod_volumes_with_updated_settings(self): volumes, volume_mounts = get_pod_volumes( persistence_outputs='outputs1', persistence_data=None) assert len( volumes) == 3 # Data3 won't be included because it's a bucket if volumes[0].name == 'outputs1': assert volumes[ 0].persistent_volume_claim.claim_name == 'test-claim-outputs-1' assert volume_mounts[0].mount_path == '/outputs/1' elif volumes[0].name == 'outputs2': assert volumes[0].host_path.path == '/root/outputs' assert volume_mounts[0].mount_path == '/outputs/2' data_claim_name = None data_host_path = None mount_path1 = None mount_path2 = None if volumes[1].name == 'data1': data_claim_name = volumes[1].persistent_volume_claim.claim_name data_host_path = volumes[2].host_path.path mount_path1 = volume_mounts[1].mount_path mount_path2 = volume_mounts[2].mount_path elif volumes[1].name == 'data2': data_host_path = volumes[1].host_path.path data_claim_name = volumes[2].persistent_volume_claim.claim_name mount_path2 = volume_mounts[1].mount_path mount_path1 = volume_mounts[2].mount_path assert data_claim_name == 'test-claim-data-1' assert data_host_path == '/root/data' assert mount_path1 == '/data/1' assert mount_path2 == '/data/2'
def test_default_get_pod_volumes(self): volumes, _ = get_pod_volumes(persistence_outputs=None, persistence_data=None) assert len(volumes) == 2 assert volumes[ 0].persistent_volume_claim.claim_name == 'test-claim-outputs' assert volumes[ 1].persistent_volume_claim.claim_name == 'test-claim-data'
def _create_job(self, task_type, task_idx, add_service, command=None, args=None, env_vars=None, resources=None, node_selector=None, restart_policy='Never'): job_name = self.pod_manager.get_job_name(task_type=task_type, task_idx=task_idx) sidecar_args = get_sidecar_args(pod_id=job_name) labels = self.pod_manager.get_labels(task_type=task_type, task_idx=task_idx) volumes, volume_mounts = get_pod_volumes( persistence_outputs=self.persistence_config.outputs, persistence_data=self.persistence_config.data) refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=self.outputs_refs_jobs, persistence_outputs=self.persistence_config.outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=self.outputs_refs_experiments, persistence_outputs=self.persistence_config.outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts pod = self.pod_manager.get_pod( task_type=task_type, task_idx=task_idx, volume_mounts=volume_mounts, volumes=volumes, labels=labels, env_vars=env_vars, command=command, args=args, sidecar_args=sidecar_args, persistence_outputs=self.persistence_config.outputs, persistence_data=self.persistence_config.data, outputs_refs_jobs=self.outputs_refs_jobs, outputs_refs_experiments=self.outputs_refs_experiments, resources=resources, node_selector=node_selector, restart_policy=restart_policy) pod_resp, _ = self.create_or_update_pod(name=job_name, data=pod) results = {'pod': pod_resp.to_dict()} if add_service: service = services.get_service(namespace=self.namespace, name=job_name, labels=labels, ports=self.pod_manager.ports, target_ports=self.pod_manager.ports) service_resp, _ = self.create_or_update_service(name=job_name, data=service) results['service'] = service_resp.to_dict() return results
def start_job(self, persistence_outputs=None, persistence_data=None, outputs_refs_jobs=None, outputs_refs_experiments=None, resources=None, node_selector=None, affinity=None, tolerations=None): # Set and validate volumes volumes, volume_mounts = get_pod_volumes(persistence_outputs=persistence_outputs, persistence_data=persistence_data) refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_jobs, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_experiments, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts shm_volumes, shm_volume_mounts = get_shm_volumes() volumes += shm_volumes volume_mounts += shm_volume_mounts context_volumes, context_mounts = get_auth_context_volumes() volumes += context_volumes volume_mounts += context_mounts command, args = self.get_pod_command_args() resource_name = self.resource_manager.get_resource_name() pod = self.resource_manager.get_pod( resource_name=resource_name, volume_mounts=volume_mounts, volumes=volumes, labels=self.resource_manager.labels, env_vars=None, command=command, args=args, init_env_vars=self.get_init_env_vars(), persistence_outputs=persistence_outputs, persistence_data=persistence_data, outputs_refs_jobs=outputs_refs_jobs, outputs_refs_experiments=outputs_refs_experiments, secret_refs=self.spec.secret_refs, config_map_refs=self.spec.config_map_refs, resources=resources, ephemeral_token=None, node_selector=node_selector, affinity=affinity, tolerations=tolerations, init_context_mounts=context_mounts, restart_policy='Never') pod_resp, _ = self.create_or_update_pod(name=resource_name, data=pod) return pod_resp.to_dict()
def start_job(self, persistence_outputs=None, persistence_data=None, outputs_refs_jobs=None, outputs_refs_experiments=None, resources=None, node_selector=None, affinity=None, tolerations=None): # Set and validate volumes volumes, volume_mounts = get_pod_volumes( persistence_outputs=persistence_outputs, persistence_data=persistence_data) refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_jobs, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_experiments, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts shm_volumes, shm_volume_mounts = get_shm_volumes() volumes += shm_volumes volume_mounts += shm_volume_mounts # Validate secret and configmap refs secret_refs = validate_secret_refs(self.spec.secret_refs) configmap_refs = validate_configmap_refs(self.spec.configmap_refs) command, args = self.get_pod_command_args() pod = self.pod_manager.get_pod( volume_mounts=volume_mounts, volumes=volumes, persistence_outputs=persistence_outputs, persistence_data=persistence_data, outputs_refs_jobs=outputs_refs_jobs, outputs_refs_experiments=outputs_refs_experiments, env_vars=None, command=command, args=args, resources=resources, secret_refs=secret_refs, configmap_refs=configmap_refs, node_selector=node_selector, affinity=affinity, tolerations=tolerations, restart_policy='Never') pod_resp, _ = self.create_or_update_pod( name=self.pod_manager.k8s_job_name, data=pod) return pod_resp.to_dict()
def start_job(self, resources=None, node_selectors=None): volumes, volume_mounts = get_pod_volumes() command, args = self.get_pod_command_args() env_vars = self.get_env_vars() pod_resp = self.pod_manager.get_pod(volume_mounts=volume_mounts, volumes=volumes, env_vars=env_vars, command=command, args=args, resources=resources, node_selector=node_selectors, restart_policy='Never') return pod_resp.to_dict()
def start_job(self, resources=None, node_selectors=None): volumes, volume_mounts = get_pod_volumes() command, args = self.get_pod_command_args() env_vars = self.get_env_vars() pod_resp = self.pod_manager.get_pod( volume_mounts=volume_mounts, volumes=volumes, env_vars=env_vars, command=command, args=args, resources=resources, node_selector=node_selectors, restart_policy='Never') return pod_resp.to_dict()
def _create_job(self, task_type, task_idx, add_service, command=None, args=None, sidecar_args_fn=None, env_vars=None, resources=None, node_selector=None, restart_policy='Never'): job_name = self.pod_manager.get_job_name(task_type=task_type, task_idx=task_idx) sidecar_args = sidecar_args_fn(pod_id=job_name) labels = self.pod_manager.get_labels(task_type=task_type, task_idx=task_idx) volumes, volume_mounts = get_pod_volumes() pod = self.pod_manager.get_pod(task_type=task_type, task_idx=task_idx, volume_mounts=volume_mounts, volumes=volumes, env_vars=env_vars, command=command, args=args, sidecar_args=sidecar_args, resources=resources, node_selector=node_selector, restart_policy=restart_policy) pod_resp, _ = self.create_or_update_pod(name=job_name, data=pod) service = services.get_service(namespace=self.namespace, name=job_name, labels=labels, ports=self.pod_manager.ports, target_ports=self.pod_manager.ports) results = {'pod': pod_resp.to_dict()} if add_service: service_resp, _ = self.create_or_update_service(name=job_name, data=service) results['service'] = service_resp.to_dict() return results
def _create_job(self, task_type, task_idx, add_service, command=None, args=None, env_vars=None, resources=None, node_selector=None, restart_policy='Never'): job_name = self.pod_manager.get_job_name(task_type=task_type, task_idx=task_idx) sidecar_args = get_sidecar_args(pod_id=job_name) labels = self.pod_manager.get_labels(task_type=task_type, task_idx=task_idx) volumes, volume_mounts = get_pod_volumes() pod = self.pod_manager.get_pod(task_type=task_type, task_idx=task_idx, volume_mounts=volume_mounts, volumes=volumes, env_vars=env_vars, command=command, args=args, sidecar_args=sidecar_args, resources=resources, node_selector=node_selector, restart_policy=restart_policy) pod_resp, _ = self.create_or_update_pod(name=job_name, data=pod) service = services.get_service(namespace=self.namespace, name=job_name, labels=labels, ports=self.pod_manager.ports, target_ports=self.pod_manager.ports) results = {'pod': pod_resp.to_dict()} if add_service: service_resp, _ = self.create_or_update_service(name=job_name, data=service) results['service'] = service_resp.to_dict() return results
def start_notebook(self, image, persistence_outputs=None, persistence_data=None, outputs_refs_jobs=None, outputs_refs_experiments=None, resources=None, secret_refs=None, configmap_refs=None, node_selector=None, affinity=None, tolerations=None, allow_commits=False): ports = [self.request_notebook_port()] target_ports = [self.PORT] volumes, volume_mounts = get_pod_volumes( persistence_outputs=persistence_outputs, persistence_data=persistence_data) refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_jobs, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_experiments, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts shm_volumes, shm_volume_mounts = get_shm_volumes() volumes += shm_volumes volume_mounts += shm_volume_mounts env_vars = get_job_env_vars( persistence_outputs=persistence_outputs, outputs_path=get_notebook_job_outputs_path( persistence_outputs=persistence_outputs, notebook_job=self.job_name), persistence_data=persistence_data, outputs_refs_jobs=outputs_refs_jobs, outputs_refs_experiments=outputs_refs_experiments) secret_refs = validate_secret_refs(secret_refs) configmap_refs = validate_configmap_refs(configmap_refs) env_from = get_pod_env_from(secret_refs=secret_refs, configmap_refs=configmap_refs) code_volume, code_volume_mount = self.get_notebook_code_volume() volumes.append(code_volume) volume_mounts.append(code_volume_mount) deployment_name = JOB_NAME_FORMAT.format(name=NOTEBOOK_JOB_NAME, job_uuid=self.job_uuid) node_selector = get_node_selector( node_selector=node_selector, default_node_selector=settings.NODE_SELECTOR_EXPERIMENTS) affinity = get_affinity(affinity=affinity, default_affinity=settings.AFFINITY_EXPERIMENTS) tolerations = get_tolerations( tolerations=tolerations, default_tolerations=settings.TOLERATIONS_EXPERIMENTS) deployment = deployments.get_deployment( namespace=self.namespace, app=settings.APP_LABELS_NOTEBOOK, name=NOTEBOOK_JOB_NAME, project_name=self.project_name, project_uuid=self.project_uuid, job_name=self.job_name, job_uuid=self.job_uuid, volume_mounts=volume_mounts, volumes=volumes, image=image, command=["/bin/sh", "-c"], args=self.get_notebook_args(deployment_name=deployment_name, ports=ports, allow_commits=allow_commits), ports=target_ports, container_name=settings.CONTAINER_NAME_PLUGIN_JOB, env_vars=env_vars, env_from=env_from, resources=resources, node_selector=node_selector, affinity=affinity, tolerations=tolerations, role=settings.ROLE_LABELS_DASHBOARD, type=settings.TYPE_LABELS_RUNNER, service_account_name=settings.K8S_SERVICE_ACCOUNT_EXPERIMENTS) deployment_labels = deployments.get_labels( app=settings.APP_LABELS_NOTEBOOK, project_name=self.project_name, project_uuid=self.project_uuid, job_name=self.job_name, job_uuid=self.job_uuid, role=settings.ROLE_LABELS_DASHBOARD, type=settings.TYPE_LABELS_RUNNER) dep_resp, _ = self.create_or_update_deployment(name=deployment_name, data=deployment) service = services.get_service(namespace=self.namespace, name=deployment_name, labels=deployment_labels, ports=ports, target_ports=target_ports, service_type=self._get_service_type()) service_resp, _ = self.create_or_update_service(name=deployment_name, data=service) results = { 'deployment': dep_resp.to_dict(), 'service': service_resp.to_dict() } if self._use_ingress(): annotations = json.loads(settings.K8S_INGRESS_ANNOTATIONS) paths = [{ 'path': '/notebook/{}'.format(self.project_name.replace('.', '/')), 'backend': { 'serviceName': deployment_name, 'servicePort': ports[0] } }] ingress = ingresses.get_ingress(namespace=self.namespace, name=deployment_name, labels=deployment_labels, annotations=annotations, paths=paths) self.create_or_update_ingress(name=deployment_name, data=ingress) return results
def _create_job( self, # pylint:disable=arguments-differ task_type, command=None, args=None, env_vars=None, resources=None, node_selector=None, affinity=None, tolerations=None, replicas=1, restart_policy='Never'): ephemeral_token = None if self.token_scope: ephemeral_token = RedisEphemeralTokens.generate_header_token( scope=self.token_scope) resource_name = self.resource_manager.get_kf_resource_name( task_type=task_type) labels = self.resource_manager.get_labels(task_type=task_type) # Set and validate volumes volumes, volume_mounts = get_pod_volumes( persistence_outputs=self.persistence_config.outputs, persistence_data=self.persistence_config.data) refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=self.outputs_refs_jobs, persistence_outputs=self.persistence_config.outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=self.outputs_refs_experiments, persistence_outputs=self.persistence_config.outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts shm_volumes, shm_volume_mounts = get_shm_volumes() volumes += shm_volumes volume_mounts += shm_volume_mounts context_volumes, context_mounts = get_auth_context_volumes() volumes += context_volumes volume_mounts += context_mounts # Validate secret and configmap refs secret_refs = validate_secret_refs(self.spec.secret_refs) configmap_refs = validate_configmap_refs(self.spec.configmap_refs) pod_template_spec = self.resource_manager.get_pod_template_spec( resource_name=resource_name, volume_mounts=volume_mounts, volumes=volumes, labels=labels, env_vars=env_vars, command=command, args=args, ports=self.ports, init_env_vars=self.get_init_env_vars(), persistence_outputs=self.persistence_config.outputs, persistence_data=self.persistence_config.data, outputs_refs_jobs=self.outputs_refs_jobs, outputs_refs_experiments=self.outputs_refs_experiments, secret_refs=secret_refs, configmap_refs=configmap_refs, resources=resources, ephemeral_token=ephemeral_token, node_selector=node_selector, affinity=affinity, tolerations=tolerations, init_context_mounts=context_mounts, restart_policy=restart_policy) return { 'replicas': replicas, 'restartPolicy': restart_policy, 'template': pod_template_spec }
def start_job(self, container_cmd_callback, persistence_outputs=None, persistence_data=None, outputs_refs_jobs=None, outputs_refs_experiments=None, resources=None, labels=None, annotations=None, secret_refs=None, config_map_refs=None, node_selector=None, affinity=None, tolerations=None, reconcile_url=None, max_restarts=None): # Set and validate volumes volumes, volume_mounts = get_pod_volumes( persistence_outputs=persistence_outputs, persistence_data=persistence_data) refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_jobs, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_experiments, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts shm_volumes, shm_volume_mounts = get_shm_volumes() volumes += shm_volumes volume_mounts += shm_volume_mounts context_volumes, context_mounts = get_auth_context_volumes() volumes += context_volumes volume_mounts += context_mounts command, args = container_cmd_callback() resource_name = self.resource_manager.get_resource_name() labels = get_labels(default_labels=self.resource_manager.labels, labels=labels) pod = self.resource_manager.get_pod( resource_name=resource_name, volume_mounts=volume_mounts, volumes=volumes, labels=labels, env_vars=None, command=command, args=args, init_env_vars=self.get_init_env_vars(), persistence_outputs=persistence_outputs, persistence_data=persistence_data, outputs_refs_jobs=outputs_refs_jobs, outputs_refs_experiments=outputs_refs_experiments, secret_refs=secret_refs, config_map_refs=config_map_refs, resources=resources, annotations=annotations, ephemeral_token=None, node_selector=node_selector, affinity=affinity, tolerations=tolerations, init_context_mounts=context_mounts, reconcile_url=reconcile_url, max_restarts=max_restarts, restart_policy=get_pod_restart_policy(max_restarts)) pod_resp, _ = self.create_or_update_pod(name=resource_name, body=pod, reraise=True) return pod_resp.to_dict()
def start_tensorboard(self, image, outputs_path, resources=None, node_selectors=None): ports = [self.request_tensorboard_port()] target_ports = [self.PORT] volumes, volume_mounts = get_pod_volumes() deployment = deployments.get_deployment( namespace=self.namespace, app=settings.APP_LABELS_TENSORBOARD, name=self.TENSORBOARD_JOB_NAME, project_name=self.project_name, project_uuid=self.project_uuid, job_name=self.job_name, job_uuid=self.job_uuid, volume_mounts=volume_mounts, volumes=volumes, image=image, command=["/bin/sh", "-c"], args=[ "tensorboard --logdir={} --port={}".format( outputs_path, self.PORT) ], ports=target_ports, container_name=settings.CONTAINER_NAME_PLUGIN_JOB, resources=resources, node_selector=node_selectors, role=settings.ROLE_LABELS_DASHBOARD, type=settings.TYPE_LABELS_EXPERIMENT) deployment_name = constants.JOB_NAME.format( name=self.TENSORBOARD_JOB_NAME, job_uuid=self.job_uuid) deployment_labels = deployments.get_labels( app=settings.APP_LABELS_TENSORBOARD, project_name=self.project_name, project_uuid=self.project_uuid, job_name=self.job_name, job_uuid=self.job_uuid, role=settings.ROLE_LABELS_DASHBOARD, type=settings.TYPE_LABELS_EXPERIMENT) dep_resp, _ = self.create_or_update_deployment(name=deployment_name, data=deployment) service = services.get_service(namespace=self.namespace, name=deployment_name, labels=deployment_labels, ports=ports, target_ports=target_ports, service_type=self._get_service_type()) service_resp, _ = self.create_or_update_service(name=deployment_name, data=service) results = { 'deployment': dep_resp.to_dict(), 'service': service_resp.to_dict() } if self._use_ingress(): annotations = json.loads(settings.K8S_INGRESS_ANNOTATIONS) paths = [{ 'path': '/tensorboard/{}'.format(self.project_name.replace('.', '/')), 'backend': { 'serviceName': deployment_name, 'servicePort': ports[0] } }] ingress = ingresses.get_ingress(namespace=self.namespace, name=deployment_name, labels=deployment_labels, annotations=annotations, paths=paths) self.create_or_update_ingress(name=deployment_name, data=ingress) return results
def test_pod_volumes_changes(self): volumes, volume_mounts = get_pod_volumes(persistence_outputs=None, persistence_data=None) assert len(volumes) == 3 if volumes[0].name == 'outputs1': assert volumes[0].persistent_volume_claim.claim_name == 'test-claim-outputs-1' assert volume_mounts[0].mount_path == '/outputs/1' elif volumes[0].name == 'outputs2': assert volumes[0].host_path.path == '/root/outputs' assert volume_mounts[0].mount_path == '/outputs/2' data_claim_name = None data_host_path = None mount_path1 = None mount_path2 = None if volumes[1].name == 'data1': data_claim_name = volumes[1].persistent_volume_claim.claim_name data_host_path = volumes[2].host_path.path mount_path1 = volume_mounts[1].mount_path mount_path2 = volume_mounts[2].mount_path elif volumes[1].name == 'data2': data_host_path = volumes[1].host_path.path data_claim_name = volumes[2].persistent_volume_claim.claim_name mount_path2 = volume_mounts[1].mount_path mount_path1 = volume_mounts[2].mount_path assert data_claim_name == 'test-claim-data-1' assert data_host_path == '/root/data' assert mount_path1 == '/data/1' assert mount_path2 == '/data/2' with self.assertRaises(VolumeNotFoundError): get_pod_volumes(persistence_outputs='foo', persistence_data=None) with self.assertRaises(VolumeNotFoundError): get_pod_volumes(persistence_outputs=None, persistence_data='foo') volumes, volume_mounts = get_pod_volumes(persistence_outputs='outputs2', persistence_data=['data2']) assert len(volumes) == 2 assert volumes[0].host_path.path == '/root/outputs' assert volumes[1].host_path.path == '/root/data' assert volume_mounts[0].mount_path == '/outputs/2' assert volume_mounts[1].mount_path == '/data/2' volumes, volume_mounts = get_pod_volumes(persistence_outputs='outputs1', persistence_data=['data1', 'data2']) assert len(volumes) == 3 assert volumes[0].persistent_volume_claim.claim_name == 'test-claim-outputs-1' data_claim_name = None data_host_path = None mount_path1 = None mount_path2 = None if volumes[1].name == 'data1': data_claim_name = volumes[1].persistent_volume_claim.claim_name data_host_path = volumes[2].host_path.path mount_path1 = volume_mounts[1].mount_path mount_path2 = volume_mounts[2].mount_path elif volumes[1].name == 'data2': data_host_path = volumes[1].host_path.path data_claim_name = volumes[2].persistent_volume_claim.claim_name mount_path2 = volume_mounts[1].mount_path mount_path1 = volume_mounts[2].mount_path assert data_claim_name == 'test-claim-data-1' assert data_host_path == '/root/data' assert mount_path1 == '/data/1' assert mount_path2 == '/data/2'
def _create_job(self, task_type, task_idx, add_service, command=None, args=None, env_vars=None, resources=None, annotations=None, node_selector=None, affinity=None, tolerations=None, max_restarts=None): ephemeral_token = None if self.token_scope: ephemeral_token = RedisEphemeralTokens.generate_header_token( scope=self.token_scope) resource_name = self.resource_manager.get_resource_name( task_type=task_type, task_idx=task_idx) job_uuid = self.get_job_uuids(task_type=task_type, task_idx=task_idx) reconcile_url = get_experiment_reconcile_url(self.experiment_name, job_uuid) labels = self.get_labels(task_type=task_type, task_idx=task_idx, job_uuid=job_uuid) # Set and validate volumes volumes, volume_mounts = get_pod_volumes( persistence_outputs=self.persistence_config.outputs, persistence_data=self.persistence_config.data) refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=self.outputs_refs_jobs, persistence_outputs=self.persistence_config.outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=self.outputs_refs_experiments, persistence_outputs=self.persistence_config.outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts shm_volumes, shm_volume_mounts = get_shm_volumes() volumes += shm_volumes volume_mounts += shm_volume_mounts context_volumes, context_mounts = get_auth_context_volumes() volumes += context_volumes volume_mounts += context_mounts pod = self.resource_manager.get_task_pod( task_type=task_type, task_idx=task_idx, volume_mounts=volume_mounts, volumes=volumes, labels=labels, env_vars=env_vars, command=command, args=args, ports=self.ports, init_env_vars=self.get_init_env_vars(), persistence_outputs=self.persistence_config.outputs, persistence_data=self.persistence_config.data, outputs_refs_jobs=self.outputs_refs_jobs, outputs_refs_experiments=self.outputs_refs_experiments, secret_refs=self.spec.secret_refs, config_map_refs=self.spec.config_map_refs, resources=resources, ephemeral_token=ephemeral_token, node_selector=node_selector, affinity=affinity, tolerations=tolerations, init_context_mounts=context_mounts, reconcile_url=reconcile_url, max_restarts=max_restarts, restart_policy=get_pod_restart_policy(max_restarts)) pod_resp, _ = self.create_or_update_pod(name=resource_name, body=pod, reraise=True) results = {'pod': pod_resp.to_dict()} if add_service: service = services.get_service(namespace=self.namespace, name=resource_name, labels=labels, ports=self.ports, target_ports=self.ports) service_resp, _ = self.create_or_update_service(name=resource_name, body=service, reraise=True) results['service'] = service_resp.to_dict() return results
def test_get_pod_volumes_with_buckets_values_only(self): volumes, _ = get_pod_volumes(persistence_outputs='outputs3', persistence_data=['data3']) self.assertEqual(len(volumes), 0)
def start_notebook(self, image, persistence_outputs=None, persistence_data=None, outputs_refs_jobs=None, outputs_refs_experiments=None, resources=None, node_selectors=None, allow_commits=False): ports = [self.request_notebook_port()] target_ports = [self.PORT] volumes, volume_mounts = get_pod_volumes( persistence_outputs=persistence_outputs, persistence_data=persistence_data) refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_jobs, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_experiments, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts env_vars = get_job_env_vars( outputs_path=get_notebook_job_outputs_path( persistence_outputs=persistence_outputs, notebook_job=self.job_name), data_paths=get_data_paths(persistence_data), outputs_refs_jobs=outputs_refs_jobs, outputs_refs_experiments=outputs_refs_experiments) code_volume, code_volume_mount = self.get_notebook_code_volume() volumes.append(code_volume) volume_mounts.append(code_volume_mount) deployment_name = constants.JOB_NAME.format( name=self.NOTEBOOK_JOB_NAME, job_uuid=self.job_uuid) deployment = deployments.get_deployment( namespace=self.namespace, app=settings.APP_LABELS_NOTEBOOK, name=self.NOTEBOOK_JOB_NAME, project_name=self.project_name, project_uuid=self.project_uuid, job_name=self.job_name, job_uuid=self.job_uuid, volume_mounts=volume_mounts, volumes=volumes, image=image, command=["/bin/sh", "-c"], args=self.get_notebook_args(deployment_name=deployment_name, ports=ports, allow_commits=allow_commits), ports=target_ports, container_name=settings.CONTAINER_NAME_PLUGIN_JOB, env_vars=env_vars, resources=resources, node_selector=node_selectors, role=settings.ROLE_LABELS_DASHBOARD, type=settings.TYPE_LABELS_EXPERIMENT) deployment_labels = deployments.get_labels( app=settings.APP_LABELS_NOTEBOOK, project_name=self.project_name, project_uuid=self.project_uuid, job_name=self.job_name, job_uuid=self.job_uuid, role=settings.ROLE_LABELS_DASHBOARD, type=settings.TYPE_LABELS_EXPERIMENT) dep_resp, _ = self.create_or_update_deployment(name=deployment_name, data=deployment) service = services.get_service(namespace=self.namespace, name=deployment_name, labels=deployment_labels, ports=ports, target_ports=target_ports, service_type=self._get_service_type()) service_resp, _ = self.create_or_update_service(name=deployment_name, data=service) results = { 'deployment': dep_resp.to_dict(), 'service': service_resp.to_dict() } if self._use_ingress(): annotations = json.loads(settings.K8S_INGRESS_ANNOTATIONS) paths = [{ 'path': '/notebook/{}'.format(self.project_name.replace('.', '/')), 'backend': { 'serviceName': deployment_name, 'servicePort': ports[0] } }] ingress = ingresses.get_ingress(namespace=self.namespace, name=deployment_name, labels=deployment_labels, annotations=annotations, paths=paths) self.create_or_update_ingress(name=deployment_name, data=ingress) return results
def start_tensorboard(self, image, resources=None, node_selectors=None): ports = [self.request_tensorboard_port()] target_ports = [self.PORT] volumes, volume_mounts = get_pod_volumes() outputs_path = get_project_outputs_path(project_name=self.project_name) deployment = deployments.get_deployment( namespace=self.namespace, app=settings.APP_LABELS_TENSORBOARD, name=self.TENSORBOARD_JOB_NAME, project_name=self.project_name, project_uuid=self.project_uuid, job_name=self.job_name, job_uuid=self.job_uuid, volume_mounts=volume_mounts, volumes=volumes, image=image, command=["/bin/sh", "-c"], args=["tensorboard --logdir={} --port={}".format(outputs_path, self.PORT)], ports=target_ports, container_name=settings.CONTAINER_NAME_PLUGIN_JOB, resources=resources, node_selector=node_selectors, role=settings.ROLE_LABELS_DASHBOARD, type=settings.TYPE_LABELS_EXPERIMENT) deployment_name = constants.JOB_NAME.format(name=self.TENSORBOARD_JOB_NAME, job_uuid=self.job_uuid) deployment_labels = deployments.get_labels(app=settings.APP_LABELS_TENSORBOARD, project_name=self.project_name, project_uuid=self.project_uuid, job_name=self.job_name, job_uuid=self.job_uuid, role=settings.ROLE_LABELS_DASHBOARD, type=settings.TYPE_LABELS_EXPERIMENT) dep_resp, _ = self.create_or_update_deployment(name=deployment_name, data=deployment) service = services.get_service( namespace=self.namespace, name=deployment_name, labels=deployment_labels, ports=ports, target_ports=target_ports, service_type=self._get_service_type()) service_resp, _ = self.create_or_update_service(name=deployment_name, data=service) results = {'deployment': dep_resp.to_dict(), 'service': service_resp.to_dict()} if self._use_ingress(): annotations = json.loads(settings.K8S_INGRESS_ANNOTATIONS) paths = [{ 'path': '/tensorboard/{}'.format(self.project_name.replace('.', '/')), 'backend': { 'serviceName': deployment_name, 'servicePort': ports[0] } }] ingress = ingresses.get_ingress(namespace=self.namespace, name=deployment_name, labels=deployment_labels, annotations=annotations, paths=paths) self.create_or_update_ingress(name=deployment_name, data=ingress) return results
def _create_job(self, task_type, task_idx, add_service, command=None, args=None, env_vars=None, resources=None, node_selector=None, affinity=None, tolerations=None, restart_policy='Never'): ephemeral_token = RedisEphemeralTokens.generate_header_token( scope=self.token_scope) job_name = self.pod_manager.get_job_name(task_type=task_type, task_idx=task_idx) sidecar_args = get_sidecar_args(pod_id=job_name) labels = self.pod_manager.get_labels(task_type=task_type, task_idx=task_idx) # Set and validate volumes volumes, volume_mounts = get_pod_volumes( persistence_outputs=self.persistence_config.outputs, persistence_data=self.persistence_config.data) refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=self.outputs_refs_jobs, persistence_outputs=self.persistence_config.outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=self.outputs_refs_experiments, persistence_outputs=self.persistence_config.outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts shm_volumes, shm_volume_mounts = get_shm_volumes() volumes += shm_volumes volume_mounts += shm_volume_mounts # Validate secret and configmap refs secret_refs = validate_secret_refs(self.spec.secret_refs) configmap_refs = validate_configmap_refs(self.spec.configmap_refs) pod = self.pod_manager.get_pod( task_type=task_type, task_idx=task_idx, volume_mounts=volume_mounts, volumes=volumes, labels=labels, env_vars=env_vars, command=command, args=args, sidecar_args=sidecar_args, persistence_outputs=self.persistence_config.outputs, persistence_data=self.persistence_config.data, outputs_refs_jobs=self.outputs_refs_jobs, outputs_refs_experiments=self.outputs_refs_experiments, secret_refs=secret_refs, configmap_refs=configmap_refs, resources=resources, ephemeral_token=ephemeral_token, node_selector=node_selector, affinity=affinity, tolerations=tolerations, restart_policy=restart_policy) pod_resp, _ = self.create_or_update_pod(name=job_name, data=pod) results = {'pod': pod_resp.to_dict()} if add_service: service = services.get_service(namespace=self.namespace, name=job_name, labels=labels, ports=self.pod_manager.ports, target_ports=self.pod_manager.ports) service_resp, _ = self.create_or_update_service(name=job_name, data=service) results['service'] = service_resp.to_dict() return results
def start_notebook(self, image, resources=None, node_selectors=None): ports = [self.request_notebook_port()] target_ports = [self.PORT] volumes, volume_mounts = get_pod_volumes() code_volume, code_volume_mount = self.get_notebook_code_volume() volumes.append(code_volume) volume_mounts.append(code_volume_mount) deployment_name = constants.JOB_NAME.format(name=self.NOTEBOOK_JOB_NAME, job_uuid=self.job_uuid) notebook_token = self.get_notebook_token() notebook_url = self._get_proxy_url( namespace=self.namespace, job_name=self.NOTEBOOK_JOB_NAME, deployment_name=deployment_name, port=ports[0]) notebook_dir = get_project_repos_path(self.project_name) notebook_dir = '{}/{}'.format(notebook_dir, notebook_dir.split('/')[-1]) deployment = deployments.get_deployment( namespace=self.namespace, app=settings.APP_LABELS_NOTEBOOK, name=self.NOTEBOOK_JOB_NAME, project_name=self.project_name, project_uuid=self.project_uuid, job_name=self.job_name, job_uuid=self.job_uuid, volume_mounts=volume_mounts, volumes=volumes, image=image, command=["/bin/sh", "-c"], args=[ "jupyter notebook " "--no-browser " "--port={port} " "--ip=0.0.0.0 " "--allow-root " "--NotebookApp.token={token} " "--NotebookApp.trust_xheaders=True " "--NotebookApp.base_url={base_url} " "--NotebookApp.notebook_dir={notebook_dir} ".format( port=self.PORT, token=notebook_token, base_url=notebook_url, notebook_dir=notebook_dir)], ports=target_ports, container_name=settings.CONTAINER_NAME_PLUGIN_JOB, resources=resources, node_selector=node_selectors, role=settings.ROLE_LABELS_DASHBOARD, type=settings.TYPE_LABELS_EXPERIMENT) deployment_labels = deployments.get_labels(app=settings.APP_LABELS_NOTEBOOK, project_name=self.project_name, project_uuid=self.project_uuid, job_name=self.job_name, job_uuid=self.job_uuid, role=settings.ROLE_LABELS_DASHBOARD, type=settings.TYPE_LABELS_EXPERIMENT) dep_resp, _ = self.create_or_update_deployment(name=deployment_name, data=deployment) service = services.get_service( namespace=self.namespace, name=deployment_name, labels=deployment_labels, ports=ports, target_ports=target_ports, service_type=self._get_service_type()) service_resp, _ = self.create_or_update_service(name=deployment_name, data=service) results = {'deployment': dep_resp.to_dict(), 'service': service_resp.to_dict()} if self._use_ingress(): annotations = json.loads(settings.K8S_INGRESS_ANNOTATIONS) paths = [{ 'path': '/notebook/{}'.format(self.project_name.replace('.', '/')), 'backend': { 'serviceName': deployment_name, 'servicePort': ports[0] } }] ingress = ingresses.get_ingress(namespace=self.namespace, name=deployment_name, labels=deployment_labels, annotations=annotations, paths=paths) self.create_or_update_ingress(name=deployment_name, data=ingress) return results
def start_notebook(self, persistence_outputs=None, persistence_data=None, outputs_refs_jobs=None, outputs_refs_experiments=None, resources=None, secret_refs=None, configmap_refs=None, node_selector=None, affinity=None, tolerations=None, backend=None, mount_code_in_notebooks=False): ports = [self.request_notebook_port()] target_ports = [self.PORT] volumes, volume_mounts = get_pod_volumes(persistence_outputs=persistence_outputs, persistence_data=persistence_data) refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_jobs, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_experiments, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts shm_volumes, shm_volume_mounts = get_shm_volumes() volumes += shm_volumes volume_mounts += shm_volume_mounts context_volumes, context_mounts = get_auth_context_volumes() volumes += context_volumes volume_mounts += context_mounts if mount_code_in_notebooks: code_volume, code_volume_mount = self.get_notebook_code_volume() volumes.append(code_volume) volume_mounts.append(code_volume_mount) secret_refs = validate_secret_refs(secret_refs) configmap_refs = validate_configmap_refs(configmap_refs) resource_name = self.resource_manager.get_resource_name() args = self.get_notebook_args(deployment_name=resource_name, ports=ports, mount_code_in_notebooks=mount_code_in_notebooks, backend=backend) command = ["/bin/sh", "-c"] deployment = self.resource_manager.get_deployment( resource_name=resource_name, volume_mounts=volume_mounts, volumes=volumes, labels=self.resource_manager.labels, env_vars=None, command=command, args=args, init_env_vars=self.get_init_env_vars(), persistence_outputs=persistence_outputs, persistence_data=persistence_data, outputs_refs_jobs=outputs_refs_jobs, outputs_refs_experiments=outputs_refs_experiments, secret_refs=secret_refs, configmap_refs=configmap_refs, resources=resources, ephemeral_token=None, node_selector=node_selector, affinity=affinity, tolerations=tolerations, ports=target_ports, init_context_mounts=context_mounts, restart_policy='Never') dep_resp, _ = self.create_or_update_deployment(name=resource_name, data=deployment) service = services.get_service( namespace=self.namespace, name=resource_name, labels=self.resource_manager.get_labels(), ports=ports, target_ports=target_ports, service_type=self._get_service_type()) service_resp, _ = self.create_or_update_service(name=resource_name, data=service) results = {'deployment': dep_resp.to_dict(), 'service': service_resp.to_dict()} if self._use_ingress(): annotations = json.loads(conf.get('K8S_INGRESS_ANNOTATIONS')) paths = [{ 'path': '/notebooks/{}'.format(self.project_name.replace('.', '/')), 'backend': { 'serviceName': resource_name, 'servicePort': ports[0] } }] ingress = ingresses.get_ingress(namespace=self.namespace, name=resource_name, labels=self.resource_manager.get_labels(), annotations=annotations, paths=paths) self.create_or_update_ingress(name=resource_name, data=ingress) return results
def test_get_pod_volumes_raises_for_wrong_values(self): with self.assertRaises(VolumeNotFoundError): get_pod_volumes(persistence_outputs='foo', persistence_data=None) with self.assertRaises(VolumeNotFoundError): get_pod_volumes(persistence_outputs=None, persistence_data='foo')