def _get_valid_config(cls, config, *fields): config = to_list(config) web_hooks = [] for web_hook in config: if not web_hook.get('url'): logger.warning( "Settings contains a non compatible web hook: `%s`", web_hook) continue url = web_hook['url'] if not validate_url(url): raise PolyaxonActionException( '{} received invalid URL `{}`.'.format(cls.name, url)) method = web_hook.get('method', 'POST') if not isinstance(method, str): raise PolyaxonActionException( '{} received invalid method `{}`.'.format( cls.name, method)) _method = method.upper() if _method not in ['GET', 'POST']: raise PolyaxonActionException( '{} received non compatible method `{}`.'.format( cls.name, method)) result_web_hook = {'url': url, 'method': _method} for field in fields: if field in web_hook: result_web_hook[field] = web_hook[field] web_hooks.append(result_web_hook) return web_hooks
def get_sidecar_container(job_name, job_container_name, sidecar_container_name, sidecar_docker_image, sidecar_docker_image_pull_policy, namespace, sidecar_config, sidecar_args, internal_health_check_url, env_vars=None): """Return a pod sidecar container.""" env_vars = to_list(env_vars) if env_vars else [] env_vars += get_sidecar_env_vars( job_name=job_name, job_container_name=job_container_name, internal_health_check_url=internal_health_check_url) env_vars += get_service_env_vars(namespace=namespace) for k, v in sidecar_config.items(): env_vars.append(get_env_var(name=k, value=v)) return client.V1Container( name=sidecar_container_name, image=sidecar_docker_image, image_pull_policy=sidecar_docker_image_pull_policy, command=get_sidecar_command(), env=env_vars, args=sidecar_args)
def get_init_container(self, persistence_outputs): """Pod init container for setting outputs path.""" if self.original_name is not None and self.cloning_strategy == CloningStrategy.RESUME: return [] if self.original_name is not None and self.cloning_strategy == CloningStrategy.COPY: command = InitCommands.COPY original_outputs_path = get_experiment_outputs_path( persistence_outputs=persistence_outputs, experiment_name=self.original_name) else: command = InitCommands.CREATE original_outputs_path = None outputs_path = get_experiment_outputs_path( persistence_outputs=persistence_outputs, experiment_name=self.experiment_name) _, outputs_volume_mount = get_pod_outputs_volume( persistence_outputs=persistence_outputs) return [ client.V1Container( name=self.init_container_name, image=self.init_docker_image, command=["/bin/sh", "-c"], args=to_list( get_output_args( command=command, outputs_path=outputs_path, original_outputs_path=original_outputs_path)), volume_mounts=outputs_volume_mount) ]
def get_task_pod_spec(self, volume_mounts, volumes, persistence_outputs=None, persistence_data=None, outputs_refs_jobs=None, outputs_refs_experiments=None, env_vars=None, command=None, args=None, resources=None, node_selector=None, affinity=None, tolerations=None, restart_policy='OnFailure'): """Pod spec to be used to create pods for tasks: master, worker, ps.""" volume_mounts = get_list(volume_mounts) volumes = get_list(volumes) gpu_volume_mounts, gpu_volumes = get_gpu_volumes_def(resources) volume_mounts += gpu_volume_mounts volumes += gpu_volumes pod_container = self.get_pod_container(volume_mounts=volume_mounts, persistence_outputs=persistence_outputs, persistence_data=persistence_data, outputs_refs_jobs=outputs_refs_jobs, outputs_refs_experiments=outputs_refs_experiments, env_vars=env_vars, command=command, args=args, resources=resources) containers = [pod_container] if self.use_sidecar: sidecar_container = self.get_sidecar_container() containers.append(sidecar_container) node_selector = get_node_selector( node_selector=node_selector, default_node_selector=settings.NODE_SELECTOR_JOBS) affinity = get_affinity( affinity=affinity, default_affinity=settings.AFFINITY_JOBS) tolerations = get_tolerations( tolerations=tolerations, default_tolerations=settings.TOLERATIONS_JOBS) service_account_name = None if settings.K8S_RBAC_ENABLED: service_account_name = settings.K8S_SERVICE_ACCOUNT_NAME return client.V1PodSpec( restart_policy=restart_policy, service_account_name=service_account_name, init_containers=to_list(self.get_init_container(persistence_outputs)), containers=containers, volumes=volumes, node_selector=node_selector, affinity=affinity, tolerations=tolerations)
def run(cls, task_bind, *args, **kwargs): retry_for = cls.retry_for or [] retry_for = to_list(retry_for) if SoftTimeLimitExceeded not in retry_for: retry_for.append(SoftTimeLimitExceeded) try: return cls._run(task_bind, *args, **kwargs) except tuple(retry_for) as exc: if task_bind.request.retries < task_bind.max_retries: raise task_bind.retry(countdown=task_bind.countdown) else: raise exc
def get_init_container(self, persistence_outputs): """Pod init container for setting outputs path.""" outputs_path = get_job_outputs_path(persistence_outputs=persistence_outputs, job_name=self.job_name) _, outputs_volume_mount = get_pod_outputs_volume(persistence_outputs=persistence_outputs) return client.V1Container( name=self.init_container_name, image=self.init_docker_image, command=["/bin/sh", "-c"], args=to_list(get_output_args(command=InitCommands.CREATE, outputs_path=outputs_path)), volume_mounts=outputs_volume_mount)
def publish_build_job_log(self, log_lines, job_uuid, job_name): log_lines = to_list(log_lines) self._logger.info("Publishing log event for task: %s", job_uuid) celery_app.send_task(EventsCeleryTasks.EVENTS_HANDLE_LOGS_BUILD_JOB, kwargs={ 'job_uuid': job_uuid, 'job_name': job_name, 'log_lines': log_lines }) self._stream_job_log(job_uuid=job_uuid, log_lines=log_lines, routing_key=RoutingKeys.LOGS_SIDECARS_BUILDS)
def get_service(namespace, name, labels, ports, target_ports, service_type=None, external_i_ps=None): external_i_ps = to_list(external_i_ps) if external_i_ps else None ports = to_list(ports) metadata = client.V1ObjectMeta(name=name, labels=labels, namespace=namespace) service_ports = [ client.V1ServicePort(port=port, target_port=target_port) for port, target_port in zip(ports, target_ports) ] spec = client.V1ServiceSpec(selector=labels, type=service_type, external_i_ps=external_i_ps, ports=service_ports) return client.V1Service(api_version=k8s_constants.K8S_API_VERSION_V1, kind=k8s_constants.K8S_SERVICE_KIND, metadata=metadata, spec=spec)
def __init__(self, namespace, name, project_name, project_uuid, job_name, job_uuid, job_docker_image, job_container_name=None, sidecar_container_name=None, sidecar_docker_image=None, init_container_name=None, init_docker_image=None, role_label=None, type_label=None, ports=None, use_sidecar=False, sidecar_config=None, log_level=None): self.namespace = namespace self.name = name self.project_name = project_name self.project_uuid = project_uuid self.job_name = job_name self.job_uuid = job_uuid self.job_container_name = job_container_name or settings.CONTAINER_NAME_JOB self.job_docker_image = job_docker_image self.sidecar_container_name = sidecar_container_name or settings.CONTAINER_NAME_SIDECAR self.sidecar_docker_image = sidecar_docker_image or settings.JOB_SIDECAR_DOCKER_IMAGE self.init_container_name = init_container_name or settings.CONTAINER_NAME_INIT self.init_docker_image = init_docker_image or settings.JOB_INIT_DOCKER_IMAGE self.role_label = role_label or settings.ROLE_LABELS_WORKER self.type_label = type_label or settings.TYPE_LABELS_RUNNER self.app_label = settings.APP_LABELS_JOB self.labels = self.get_labels() self.k8s_job_name = self.get_k8s_job_name() self.ports = to_list(ports) if ports else [] self.use_sidecar = use_sidecar if use_sidecar and not sidecar_config: raise PolyaxonConfigurationError( 'In order to use a `sidecar_config` is required. ' 'The `sidecar_config` must correspond to the sidecar docker image used.' ) self.sidecar_config = sidecar_config self.log_level = log_level
def _search_algorithm_condition(queryset, params, negation): params = to_list(params) updated_params = [] for param in params: param = param.lower() if param == 'random': param = 'random_search' if param == 'grid': param = 'grid_search' updated_params.append(param) if len(params) == 1: query = Q(hptuning__has_key=updated_params[0]) else: query = Q(hptuning__has_any_keys=updated_params) if negation: query = ~query return queryset.filter(query)
def get_sidecar_container(job_name, job_container_name, sidecar_container_name, sidecar_docker_image, namespace, app_label, sidecar_config, sidecar_args, env_vars=None): """Return a pod sidecar container.""" env_vars = to_list(env_vars) if env_vars else [] env_vars += get_sidecar_env_vars(job_name=job_name, job_container_name=job_container_name) env_vars += get_service_env_vars(namespace=namespace) for k, v in sidecar_config.items(): env_vars.append(get_env_var(name=k, value=v)) return client.V1Container(name=sidecar_container_name, image=sidecar_docker_image, command=get_sidecar_command(app_label=app_label), env=env_vars, args=sidecar_args)
def _eq_operator(name, params): name = '{}__contains'.format(name) return Q(**{name: to_list(params)})
def get_list(values): return to_list(values) if values is not None else []
def get_task_pod_spec(self, task_type, task_idx, volume_mounts, volumes, env_vars=None, command=None, args=None, sidecar_args=None, persistence_outputs=None, persistence_data=None, outputs_refs_jobs=None, outputs_refs_experiments=None, resources=None, secret_refs=None, configmap_refs=None, ephemeral_token=None, node_selector=None, affinity=None, tolerations=None, restart_policy='OnFailure'): """Pod spec to be used to create pods for tasks: master, worker, ps.""" volume_mounts = get_list(volume_mounts) volumes = get_list(volumes) gpu_volume_mounts, gpu_volumes = get_gpu_volumes_def(resources) volume_mounts += gpu_volume_mounts volumes += gpu_volumes # Add job information env_vars = get_list(env_vars) env_vars.append( client.V1EnvVar(name=constants.CONFIG_MAP_TASK_INFO_KEY_NAME, value=json.dumps({ 'type': task_type, 'index': task_idx }))) pod_container = self.get_pod_container( volume_mounts=volume_mounts, env_vars=env_vars, command=command, args=args, persistence_outputs=persistence_outputs, persistence_data=persistence_data, outputs_refs_jobs=outputs_refs_jobs, outputs_refs_experiments=outputs_refs_experiments, secret_refs=secret_refs, configmap_refs=configmap_refs, resources=resources, ephemeral_token=ephemeral_token) containers = [pod_container] if self.use_sidecar: sidecar_container = self.get_sidecar_container(task_type=task_type, task_idx=task_idx, args=sidecar_args) containers.append(sidecar_container) node_selector = get_node_selector( node_selector=node_selector, default_node_selector=settings.NODE_SELECTOR_EXPERIMENTS) affinity = get_affinity(affinity=affinity, default_affinity=settings.AFFINITY_EXPERIMENTS) tolerations = get_tolerations( tolerations=tolerations, default_tolerations=settings.TOLERATIONS_EXPERIMENTS) service_account_name = None if settings.K8S_RBAC_ENABLED: service_account_name = settings.K8S_SERVICE_ACCOUNT_NAME return client.V1PodSpec( restart_policy=restart_policy, service_account_name=service_account_name, init_containers=to_list( self.get_init_container(persistence_outputs)), containers=containers, volumes=volumes, node_selector=node_selector, tolerations=tolerations, affinity=affinity)
def _lock_log(log_path, log_lines): log_lines = to_list(log_lines) with open(log_path, "a") as log_file: fcntl.flock(log_file, fcntl.LOCK_EX) log_file.write('\n'.join(log_lines) + '\n') fcntl.flock(log_file, fcntl.LOCK_UN)