示例#1
0
 def stop_notebook(self):
     deployment_name = JOB_NAME_FORMAT.format(name=NOTEBOOK_JOB_NAME,
                                              job_uuid=self.job_uuid)
     try:
         self.delete_deployment(name=deployment_name, reraise=True)
         self.delete_service(name=deployment_name)
         if self._use_ingress():
             self.delete_ingress(name=deployment_name)
         return True
     except PolyaxonK8SError:
         return False
 def stop_tensorboard(self):
     deployment_name = JOB_NAME_FORMAT.format(name=TENSORBOARD_JOB_NAME,
                                              job_uuid=self.job_uuid)
     try:
         self.delete_deployment(name=deployment_name)
         self.delete_service(name=deployment_name)
         if self._use_ingress():
             self.delete_ingress(name=deployment_name)
         return True
     except (PolyaxonK8SError, ConfigException):
         return False
示例#3
0
def get_deployment_spec(
        namespace,
        app,
        name,
        project_name,
        project_uuid,
        job_name,
        job_uuid,
        volume_mounts,
        volumes,
        image,
        command,
        args,
        ports,
        env_vars=None,
        env_from=None,
        container_name=None,
        resources=None,
        node_selector=None,
        affinity=None,
        tolerations=None,
        role=None,
        type=None,  # pylint:disable=redefined-builtin
        replicas=1,
        service_account_name=None):
    labels = get_labels(app=app,
                        project_name=project_name,
                        project_uuid=project_uuid,
                        job_name=job_name,
                        job_uuid=job_uuid,
                        role=role,
                        type=type)
    metadata = client.V1ObjectMeta(name=JOB_NAME_FORMAT.format(
        name=name, job_uuid=job_uuid),
                                   labels=labels,
                                   namespace=namespace)
    pod_spec = get_project_pod_spec(volume_mounts=volume_mounts,
                                    volumes=volumes,
                                    image=image,
                                    container_name=container_name,
                                    command=command,
                                    args=args,
                                    resources=resources,
                                    node_selector=node_selector,
                                    affinity=affinity,
                                    tolerations=tolerations,
                                    ports=ports,
                                    env_vars=env_vars,
                                    env_from=env_from,
                                    service_account_name=service_account_name)
    template_spec = client.V1PodTemplateSpec(metadata=metadata, spec=pod_spec)
    return client.AppsV1beta1DeploymentSpec(replicas=replicas,
                                            template=template_spec)
示例#4
0
def process_logs(build, temp=True):
    pod_id = JOB_NAME_FORMAT.format(name=DOCKERIZER_JOB_NAME,
                                    job_uuid=build.uuid.hex)
    k8s_manager = K8SManager(namespace=settings.K8S_NAMESPACE, in_cluster=True)
    log_lines = base.process_logs(
        k8s_manager=k8s_manager,
        pod_id=pod_id,
        container_job_name=settings.CONTAINER_NAME_DOCKERIZER_JOB)

    safe_log_job(job_name=build.unique_name,
                 log_lines=log_lines,
                 temp=temp,
                 append=False)
示例#5
0
    def start_dockerizer(self,
                         resources=None,
                         node_selector=None,
                         affinity=None,
                         tolerations=None):
        volumes, volume_mounts = get_docker_volumes()

        node_selector = get_node_selector(
            node_selector=node_selector,
            default_node_selector=conf.get('NODE_SELECTOR_BUILDS'))
        affinity = get_affinity(
            affinity=affinity,
            default_affinity=conf.get('AFFINITY_BUILDS'))
        tolerations = get_tolerations(
            tolerations=tolerations,
            default_tolerations=conf.get('TOLERATIONS_BUILDS'))
        pod = pods.get_pod(
            namespace=self.namespace,
            app=conf.get('APP_LABELS_DOCKERIZER'),
            name=DOCKERIZER_JOB_NAME,
            project_name=self.project_name,
            project_uuid=self.project_uuid,
            job_name=self.job_name,
            job_uuid=self.job_uuid,
            volume_mounts=volume_mounts,
            volumes=volumes,
            image=conf.get('JOB_DOCKERIZER_IMAGE'),
            image_pull_policy=conf.get('JOB_DOCKERIZER_IMAGE_PULL_POLICY'),
            command=None,
            args=[self.job_uuid],
            ports=[],
            env_vars=self.get_env_vars(),
            container_name=conf.get('CONTAINER_NAME_DOCKERIZER_JOB'),
            resources=resources,
            node_selector=node_selector,
            affinity=affinity,
            tolerations=tolerations,
            role=conf.get('ROLE_LABELS_WORKER'),
            type=conf.get('TYPE_LABELS_RUNNER'),
            service_account_name=conf.get('K8S_SERVICE_ACCOUNT_BUILDS'),
            restart_policy='Never')
        pod_name = JOB_NAME_FORMAT.format(job_uuid=self.job_uuid, name=DOCKERIZER_JOB_NAME)

        pod_resp, _ = self.create_or_update_pod(name=pod_name, data=pod)
        return pod_resp.to_dict()
示例#6
0
文件: jobs.py 项目: posix4e/polyaxon
async def job_logs_v2(request, ws, username, project_name, job_id):
    job, message = validate_job(request=request,
                                username=username,
                                project_name=project_name,
                                job_id=job_id)
    if job is None:
        await ws.send(get_error_message(message))
        return

    job_uuid = job.uuid.hex

    auditor.record(event_type=JOB_LOGS_VIEWED,
                   instance=job,
                   actor_id=request.app.user.id,
                   actor_name=request.app.user.username)

    pod_id = JOB_NAME_FORMAT.format(name=JOB_NAME, job_uuid=job_uuid)
    # Stream logs
    await log_job(request=request,
                  ws=ws,
                  job=job,
                  pod_id=pod_id,
                  container=settings.CONTAINER_NAME_JOB,
                  namespace=settings.K8S_NAMESPACE)
示例#7
0
 def pod_id(self) -> str:
     return JOB_NAME_FORMAT.format(name=DOCKERIZER_JOB_NAME,
                                   job_uuid=self.uuid.hex)
示例#8
0
    def start_notebook(self,
                       image,
                       persistence_outputs=None,
                       persistence_data=None,
                       outputs_refs_jobs=None,
                       outputs_refs_experiments=None,
                       resources=None,
                       secret_refs=None,
                       configmap_refs=None,
                       node_selector=None,
                       affinity=None,
                       tolerations=None,
                       allow_commits=False):
        ports = [self.request_notebook_port()]
        target_ports = [self.PORT]
        volumes, volume_mounts = get_pod_volumes(
            persistence_outputs=persistence_outputs,
            persistence_data=persistence_data)
        refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes(
            outputs_refs=outputs_refs_jobs,
            persistence_outputs=persistence_outputs)
        volumes += refs_volumes
        volume_mounts += refs_volume_mounts
        refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes(
            outputs_refs=outputs_refs_experiments,
            persistence_outputs=persistence_outputs)
        volumes += refs_volumes
        volume_mounts += refs_volume_mounts
        shm_volumes, shm_volume_mounts = get_shm_volumes()
        volumes += shm_volumes
        volume_mounts += shm_volume_mounts
        env_vars = get_job_env_vars(
            persistence_outputs=persistence_outputs,
            outputs_path=get_notebook_job_outputs_path(
                persistence_outputs=persistence_outputs,
                notebook_job=self.job_name),
            persistence_data=persistence_data,
            outputs_refs_jobs=outputs_refs_jobs,
            outputs_refs_experiments=outputs_refs_experiments)
        secret_refs = validate_secret_refs(secret_refs)
        configmap_refs = validate_configmap_refs(configmap_refs)
        env_from = get_pod_env_from(secret_refs=secret_refs,
                                    configmap_refs=configmap_refs)
        code_volume, code_volume_mount = self.get_notebook_code_volume()
        volumes.append(code_volume)
        volume_mounts.append(code_volume_mount)
        deployment_name = JOB_NAME_FORMAT.format(name=NOTEBOOK_JOB_NAME,
                                                 job_uuid=self.job_uuid)

        node_selector = get_node_selector(
            node_selector=node_selector,
            default_node_selector=settings.NODE_SELECTOR_EXPERIMENTS)
        affinity = get_affinity(affinity=affinity,
                                default_affinity=settings.AFFINITY_EXPERIMENTS)
        tolerations = get_tolerations(
            tolerations=tolerations,
            default_tolerations=settings.TOLERATIONS_EXPERIMENTS)
        deployment = deployments.get_deployment(
            namespace=self.namespace,
            app=settings.APP_LABELS_NOTEBOOK,
            name=NOTEBOOK_JOB_NAME,
            project_name=self.project_name,
            project_uuid=self.project_uuid,
            job_name=self.job_name,
            job_uuid=self.job_uuid,
            volume_mounts=volume_mounts,
            volumes=volumes,
            image=image,
            command=["/bin/sh", "-c"],
            args=self.get_notebook_args(deployment_name=deployment_name,
                                        ports=ports,
                                        allow_commits=allow_commits),
            ports=target_ports,
            container_name=settings.CONTAINER_NAME_PLUGIN_JOB,
            env_vars=env_vars,
            env_from=env_from,
            resources=resources,
            node_selector=node_selector,
            affinity=affinity,
            tolerations=tolerations,
            role=settings.ROLE_LABELS_DASHBOARD,
            type=settings.TYPE_LABELS_RUNNER,
            service_account_name=settings.K8S_SERVICE_ACCOUNT_EXPERIMENTS)
        deployment_labels = deployments.get_labels(
            app=settings.APP_LABELS_NOTEBOOK,
            project_name=self.project_name,
            project_uuid=self.project_uuid,
            job_name=self.job_name,
            job_uuid=self.job_uuid,
            role=settings.ROLE_LABELS_DASHBOARD,
            type=settings.TYPE_LABELS_RUNNER)
        dep_resp, _ = self.create_or_update_deployment(name=deployment_name,
                                                       data=deployment)
        service = services.get_service(namespace=self.namespace,
                                       name=deployment_name,
                                       labels=deployment_labels,
                                       ports=ports,
                                       target_ports=target_ports,
                                       service_type=self._get_service_type())

        service_resp, _ = self.create_or_update_service(name=deployment_name,
                                                        data=service)
        results = {
            'deployment': dep_resp.to_dict(),
            'service': service_resp.to_dict()
        }

        if self._use_ingress():
            annotations = json.loads(settings.K8S_INGRESS_ANNOTATIONS)
            paths = [{
                'path':
                '/notebook/{}'.format(self.project_name.replace('.', '/')),
                'backend': {
                    'serviceName': deployment_name,
                    'servicePort': ports[0]
                }
            }]
            ingress = ingresses.get_ingress(namespace=self.namespace,
                                            name=deployment_name,
                                            labels=deployment_labels,
                                            annotations=annotations,
                                            paths=paths)
            self.create_or_update_ingress(name=deployment_name, data=ingress)
        return results
示例#9
0
 def pod_id(self) -> str:
     return JOB_NAME_FORMAT.format(name=TENSORBOARD_JOB_NAME, job_uuid=self.uuid.hex)
示例#10
0
 def get_resource_name(self):
     return JOB_NAME_FORMAT.format(name=self.name, job_uuid=self.job_uuid)
示例#11
0
 def pod_id(self):
     return JOB_NAME_FORMAT.format(name=JOB_NAME, job_uuid=self.uuid.hex)
示例#12
0
def get_deployment(
        namespace,
        app,
        name,
        project_name,
        project_uuid,
        job_name,
        job_uuid,
        volume_mounts,
        volumes,
        image,
        command,
        args,
        ports,
        container_name,
        env_vars=None,
        env_from=None,
        resources=None,
        node_selector=None,
        affinity=None,
        tolerations=None,
        role=None,
        type=None,  # pylint:disable=redefined-builtin
        replicas=1,
        service_account_name=None):
    labels = get_labels(app=app,
                        project_name=project_name,
                        project_uuid=project_uuid,
                        job_name=job_name,
                        job_uuid=job_uuid,
                        role=role,
                        type=type)
    metadata = client.V1ObjectMeta(name=JOB_NAME_FORMAT.format(
        name=name, job_uuid=job_uuid),
                                   labels=labels,
                                   namespace=namespace)
    spec = get_deployment_spec(namespace=namespace,
                               app=app,
                               name=name,
                               project_name=project_name,
                               project_uuid=project_uuid,
                               job_name=job_name,
                               job_uuid=job_uuid,
                               volume_mounts=volume_mounts,
                               volumes=volumes,
                               image=image,
                               command=command,
                               args=args,
                               ports=ports,
                               env_vars=env_vars,
                               env_from=env_from,
                               container_name=container_name,
                               resources=resources,
                               node_selector=node_selector,
                               affinity=affinity,
                               tolerations=tolerations,
                               role=role,
                               type=type,
                               replicas=replicas,
                               service_account_name=service_account_name)
    return client.AppsV1beta1Deployment(
        api_version=k8s_constants.K8S_API_VERSION_EXTENSIONS_V1_BETA1,
        kind=k8s_constants.K8S_DEPLOYMENT_KIND,
        metadata=metadata,
        spec=spec)
示例#13
0
 def pod_id(self) -> str:
     return JOB_NAME_FORMAT.format(name=NOTEBOOK_JOB_NAME,
                                   job_uuid=self.uuid.hex)
示例#14
0
    def start_tensorboard(self,
                          image,
                          outputs_path,
                          persistence_outputs,
                          outputs_specs=None,
                          outputs_refs_jobs=None,
                          outputs_refs_experiments=None,
                          resources=None,
                          node_selector=None,
                          affinity=None,
                          tolerations=None):
        ports = [self.request_tensorboard_port()]
        target_ports = [self.PORT]
        volumes, volume_mounts = get_pod_outputs_volume(persistence_outputs)
        refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes(
            outputs_refs=outputs_refs_jobs,
            persistence_outputs=persistence_outputs)
        volumes += refs_volumes
        volume_mounts += refs_volume_mounts
        refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes(
            outputs_refs=outputs_specs,
            persistence_outputs=persistence_outputs)
        volumes += refs_volumes
        volume_mounts += refs_volume_mounts
        refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes(
            outputs_refs=outputs_refs_experiments,
            persistence_outputs=persistence_outputs)
        volumes += refs_volumes
        volume_mounts += refs_volume_mounts

        # Add volumes for persistence outputs secrets
        stores_secrets = get_stores_secrets(specs=outputs_specs)
        self.validate_stores_secrets_keys(stores_secrets=stores_secrets)
        secrets_volumes, secrets_volume_mounts = self.get_stores_secrets_volumes(
            stores_secrets=stores_secrets)
        volumes += secrets_volumes
        volume_mounts += secrets_volume_mounts

        # Get persistence outputs secrets auth commands
        command_args = self.get_stores_secrets_command_args(
            stores_secrets=stores_secrets)
        command_args.append("tensorboard --logdir={} --port={}".format(
            outputs_path, self.PORT))

        node_selector = get_node_selector(
            node_selector=node_selector,
            default_node_selector=settings.NODE_SELECTOR_TENSORBOARDS)
        affinity = get_affinity(
            affinity=affinity, default_affinity=settings.AFFINITY_TENSORBOARDS)
        tolerations = get_tolerations(
            tolerations=tolerations,
            default_tolerations=settings.TOLERATIONS_TENSORBOARDS)
        deployment = deployments.get_deployment(
            namespace=self.namespace,
            app=settings.APP_LABELS_TENSORBOARD,
            name=TENSORBOARD_JOB_NAME,
            project_name=self.project_name,
            project_uuid=self.project_uuid,
            job_name=self.job_name,
            job_uuid=self.job_uuid,
            volume_mounts=volume_mounts,
            volumes=volumes,
            image=image,
            command=["/bin/sh", "-c"],
            args=[' && '.join(command_args)],
            ports=target_ports,
            container_name=settings.CONTAINER_NAME_PLUGIN_JOB,
            resources=resources,
            node_selector=node_selector,
            affinity=affinity,
            tolerations=tolerations,
            role=settings.ROLE_LABELS_DASHBOARD,
            type=settings.TYPE_LABELS_RUNNER)
        deployment_name = JOB_NAME_FORMAT.format(name=TENSORBOARD_JOB_NAME,
                                                 job_uuid=self.job_uuid)
        deployment_labels = deployments.get_labels(
            app=settings.APP_LABELS_TENSORBOARD,
            project_name=self.project_name,
            project_uuid=self.project_uuid,
            job_name=self.job_name,
            job_uuid=self.job_uuid,
            role=settings.ROLE_LABELS_DASHBOARD,
            type=settings.TYPE_LABELS_RUNNER)

        dep_resp, _ = self.create_or_update_deployment(name=deployment_name,
                                                       data=deployment)
        service = services.get_service(namespace=self.namespace,
                                       name=deployment_name,
                                       labels=deployment_labels,
                                       ports=ports,
                                       target_ports=target_ports,
                                       service_type=self._get_service_type())
        service_resp, _ = self.create_or_update_service(name=deployment_name,
                                                        data=service)
        results = {
            'deployment': dep_resp.to_dict(),
            'service': service_resp.to_dict()
        }

        if self._use_ingress():
            annotations = json.loads(settings.K8S_INGRESS_ANNOTATIONS)
            paths = [{
                'path':
                '/tensorboard/{}'.format(self.project_name.replace('.', '/')),
                'backend': {
                    'serviceName': deployment_name,
                    'servicePort': ports[0]
                }
            }]
            ingress = ingresses.get_ingress(namespace=self.namespace,
                                            name=deployment_name,
                                            labels=deployment_labels,
                                            annotations=annotations,
                                            paths=paths)
            self.create_or_update_ingress(name=deployment_name, data=ingress)

        return results
示例#15
0
 def get_k8s_job_name(self):
     return JOB_NAME_FORMAT.format(name=self.name, job_uuid=self.job_uuid)