示例#1
0
    def test_get_pod_outputs_volume_update_settings(self):
        volumes, _ = get_pod_outputs_volume(persistence_outputs='outputs1')
        assert len(volumes) == 1
        assert volumes[
            0].persistent_volume_claim.claim_name == 'test-claim-outputs-1'

        volumes, _ = get_pod_outputs_volume(persistence_outputs='outputs2')
        assert len(volumes) == 1
        assert volumes[0].host_path.path == '/root/outputs'

        volumes, _ = get_pod_outputs_volume(persistence_outputs='outputs3')
        self.assertEqual(len(volumes), 0)
示例#2
0
 def get_init_container(self, init_command, init_args, env_vars,
                        context_mounts, persistence_outputs,
                        persistence_data):
     """Pod init container for setting outputs path."""
     env_vars = to_list(env_vars, check_none=True)
     outputs_path = stores.get_job_outputs_path(
         persistence=persistence_outputs, job_name=self.job_name)
     _, outputs_volume_mount = get_pod_outputs_volume(
         persistence_outputs=persistence_outputs)
     volume_mounts = outputs_volume_mount + to_list(context_mounts,
                                                    check_none=True)
     init_command = init_command or ["/bin/sh", "-c"]
     init_args = init_args or to_list(
         get_output_args(command=InitCommands.CREATE,
                         outputs_path=outputs_path))
     init_args += to_list(
         get_auth_context_args(entity='job', entity_name=self.job_name))
     return client.V1Container(
         name=self.init_container_name,
         image=self.init_docker_image,
         image_pull_policy=self.init_docker_image_pull_policy,
         command=init_command,
         args=[''.join(init_args)],
         env=env_vars,
         volume_mounts=volume_mounts)
示例#3
0
文件: pods.py 项目: innovia/polyaxon
    def get_init_container(self, persistence_outputs):
        """Pod init container for setting outputs path."""
        if self.original_name is not None and self.cloning_strategy == CloningStrategy.RESUME:
            return []
        if self.original_name is not None and self.cloning_strategy == CloningStrategy.COPY:
            command = InitCommands.COPY
            original_outputs_path = get_experiment_outputs_path(
                persistence_outputs=persistence_outputs, experiment_name=self.original_name)
        else:
            command = InitCommands.CREATE
            original_outputs_path = None

        outputs_path = get_experiment_outputs_path(persistence_outputs=persistence_outputs,
                                                   experiment_name=self.experiment_name)
        _, outputs_volume_mount = get_pod_outputs_volume(persistence_outputs=persistence_outputs)
        return [
            client.V1Container(
                name=self.init_container_name,
                image=self.init_docker_image,
                command=["/bin/sh", "-c"],
                args=to_list(get_output_args(command=command,
                                             outputs_path=outputs_path,
                                             original_outputs_path=original_outputs_path)),
                volume_mounts=outputs_volume_mount)
        ]
示例#4
0
文件: pods.py 项目: jonike/polyaxon
 def get_init_container(self, persistence_outputs):
     """Pod init container for setting outputs path."""
     outputs_path = get_job_outputs_path(persistence_outputs=persistence_outputs,
                                         job_name=self.job_name)
     _, outputs_volume_mount = get_pod_outputs_volume(persistence_outputs=persistence_outputs)
     return client.V1Container(
         name=self.init_container_name,
         image=self.init_docker_image,
         command=["/bin/sh", "-c"],
         args=to_list(get_output_args(command=InitCommands.CREATE,
                                      outputs_path=outputs_path)),
         volume_mounts=outputs_volume_mount)
示例#5
0
 def get_init_container(self, init_command, init_args, env_vars,
                        context_mounts, persistence_outputs,
                        persistence_data):
     """Pod init container for setting outputs path."""
     outputs_path = stores.get_job_outputs_path(
         persistence=persistence_outputs, job_name=self.job_name)
     _, outputs_volume_mount = get_pod_outputs_volume(
         persistence_outputs=persistence_outputs)
     init_command = init_command or ["/bin/sh", "-c"]
     init_args = init_args or to_list(
         get_output_args(command=InitCommands.CREATE,
                         outputs_path=outputs_path))
     return client.V1Container(
         name=self.init_container_name,
         image=self.init_docker_image,
         image_pull_policy=self.init_docker_image_pull_policy,
         command=init_command,
         args=init_args,
         volume_mounts=outputs_volume_mount)
示例#6
0
    def get_init_container(self,
                           init_command,
                           init_args,
                           env_vars,
                           context_mounts,
                           persistence_outputs,
                           persistence_data):
        """Pod init container for setting outputs path."""
        env_vars = to_list(env_vars, check_none=True)
        if self.original_name is not None and self.cloning_strategy == CloningStrategy.RESUME:
            return []
        if self.original_name is not None and self.cloning_strategy == CloningStrategy.COPY:
            command = InitCommands.COPY
            original_outputs_path = stores.get_experiment_outputs_path(
                persistence=persistence_outputs,
                experiment_name=self.original_name)
        else:
            command = InitCommands.CREATE
            original_outputs_path = None

        outputs_path = stores.get_experiment_outputs_path(
            persistence=persistence_outputs,
            experiment_name=self.experiment_name)
        _, outputs_volume_mount = get_pod_outputs_volume(persistence_outputs=persistence_outputs)
        volume_mounts = outputs_volume_mount + to_list(context_mounts, check_none=True)
        init_command = init_command or ["/bin/sh", "-c"]
        init_args = init_args or to_list(
            get_output_args(command=command,
                            outputs_path=outputs_path,
                            original_outputs_path=original_outputs_path))
        init_args += to_list(get_auth_context_args(entity='experiment',
                                                   entity_name=self.experiment_name))
        return [
            client.V1Container(
                name=self.init_container_name,
                image=self.init_docker_image,
                image_pull_policy=self.init_docker_image_pull_policy,
                command=init_command,
                args=[''.join(init_args)],
                env=env_vars,
                resources=get_init_resources(),
                volume_mounts=volume_mounts)
        ]
示例#7
0
 def test_get_pod_outputs_volume_wrong_values(self):
     with self.assertRaises(VolumeNotFoundError):
         get_pod_outputs_volume(persistence_outputs='foo')
示例#8
0
 def test_get_pod_outputs_volume(self):
     volumes, _ = get_pod_outputs_volume(None)
     assert len(volumes) == 1
     assert volumes[
         0].persistent_volume_claim.claim_name == 'test-claim-outputs'
示例#9
0
    def start_tensorboard(self,
                          image,
                          outputs_path,
                          persistence_outputs,
                          outputs_refs_jobs=None,
                          outputs_refs_experiments=None,
                          resources=None,
                          node_selectors=None):
        ports = [self.request_tensorboard_port()]
        target_ports = [self.PORT]
        volumes, volume_mounts = get_pod_outputs_volume(persistence_outputs)
        refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes(
            outputs_refs=outputs_refs_jobs,
            persistence_outputs=persistence_outputs)
        volumes += refs_volumes
        volume_mounts += refs_volume_mounts
        refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes(
            outputs_refs=outputs_refs_experiments,
            persistence_outputs=persistence_outputs)
        volumes += refs_volumes
        volume_mounts += refs_volume_mounts
        deployment = deployments.get_deployment(
            namespace=self.namespace,
            app=settings.APP_LABELS_TENSORBOARD,
            name=self.TENSORBOARD_JOB_NAME,
            project_name=self.project_name,
            project_uuid=self.project_uuid,
            job_name=self.job_name,
            job_uuid=self.job_uuid,
            volume_mounts=volume_mounts,
            volumes=volumes,
            image=image,
            command=["/bin/sh", "-c"],
            args=[
                "tensorboard --logdir={} --port={}".format(
                    outputs_path, self.PORT)
            ],
            ports=target_ports,
            container_name=settings.CONTAINER_NAME_PLUGIN_JOB,
            resources=resources,
            node_selector=node_selectors,
            role=settings.ROLE_LABELS_DASHBOARD,
            type=settings.TYPE_LABELS_EXPERIMENT)
        deployment_name = constants.JOB_NAME.format(
            name=self.TENSORBOARD_JOB_NAME, job_uuid=self.job_uuid)
        deployment_labels = deployments.get_labels(
            app=settings.APP_LABELS_TENSORBOARD,
            project_name=self.project_name,
            project_uuid=self.project_uuid,
            job_name=self.job_name,
            job_uuid=self.job_uuid,
            role=settings.ROLE_LABELS_DASHBOARD,
            type=settings.TYPE_LABELS_EXPERIMENT)

        dep_resp, _ = self.create_or_update_deployment(name=deployment_name,
                                                       data=deployment)
        service = services.get_service(namespace=self.namespace,
                                       name=deployment_name,
                                       labels=deployment_labels,
                                       ports=ports,
                                       target_ports=target_ports,
                                       service_type=self._get_service_type())
        service_resp, _ = self.create_or_update_service(name=deployment_name,
                                                        data=service)
        results = {
            'deployment': dep_resp.to_dict(),
            'service': service_resp.to_dict()
        }

        if self._use_ingress():
            annotations = json.loads(settings.K8S_INGRESS_ANNOTATIONS)
            paths = [{
                'path':
                '/tensorboard/{}'.format(self.project_name.replace('.', '/')),
                'backend': {
                    'serviceName': deployment_name,
                    'servicePort': ports[0]
                }
            }]
            ingress = ingresses.get_ingress(namespace=self.namespace,
                                            name=deployment_name,
                                            labels=deployment_labels,
                                            annotations=annotations,
                                            paths=paths)
            self.create_or_update_ingress(name=deployment_name, data=ingress)

        return results
示例#10
0
    def start_tensorboard(self,
                          outputs_path,
                          persistence_outputs,
                          outputs_specs=None,
                          outputs_refs_jobs=None,
                          outputs_refs_experiments=None,
                          resources=None,
                          node_selector=None,
                          affinity=None,
                          tolerations=None):
        ports = [self.request_tensorboard_port()]
        target_ports = [self.PORT]
        volumes, volume_mounts = get_pod_outputs_volume(persistence_outputs)
        refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes(
            outputs_refs=outputs_refs_jobs,
            persistence_outputs=persistence_outputs)
        volumes += refs_volumes
        volume_mounts += refs_volume_mounts
        refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes(
            outputs_refs=outputs_specs,
            persistence_outputs=persistence_outputs)
        volumes += refs_volumes
        volume_mounts += refs_volume_mounts
        refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes(
            outputs_refs=outputs_refs_experiments,
            persistence_outputs=persistence_outputs)
        volumes += refs_volumes
        volume_mounts += refs_volume_mounts

        # Add volumes for persistence outputs secrets
        stores_secrets = get_stores_secrets(specs=outputs_specs)
        self.validate_stores_secrets_keys(stores_secrets=stores_secrets)
        secrets_volumes, secrets_volume_mounts = self.get_stores_secrets_volumes(
            stores_secrets=stores_secrets)
        volumes += secrets_volumes
        volume_mounts += secrets_volume_mounts

        resource_name = self.resource_manager.get_resource_name()
        # Get persistence outputs secrets auth commands
        command_args = self.get_stores_secrets_command_args(
            stores_secrets=stores_secrets)
        command_args.append("tensorboard --logdir={} --port={}".format(
            outputs_path, self.PORT))
        args = [' && '.join(command_args)]
        command = ["/bin/sh", "-c"]

        deployment = self.resource_manager.get_deployment(
            resource_name=resource_name,
            volume_mounts=volume_mounts,
            volumes=volumes,
            labels=self.resource_manager.labels,
            env_vars=None,
            command=command,
            args=args,
            persistence_outputs=persistence_outputs,
            outputs_refs_jobs=outputs_refs_jobs,
            outputs_refs_experiments=outputs_refs_experiments,
            resources=resources,
            ephemeral_token=None,
            node_selector=node_selector,
            affinity=affinity,
            tolerations=tolerations,
            ports=target_ports,
            restart_policy='Never')

        dep_resp, _ = self.create_or_update_deployment(name=resource_name,
                                                       data=deployment)
        service = services.get_service(
            namespace=self.namespace,
            name=resource_name,
            labels=self.resource_manager.get_labels(),
            ports=ports,
            target_ports=target_ports,
            service_type=self._get_service_type())
        service_resp, _ = self.create_or_update_service(name=resource_name,
                                                        data=service)
        results = {
            'deployment': dep_resp.to_dict(),
            'service': service_resp.to_dict()
        }

        if self._use_ingress():
            annotations = json.loads(conf.get('K8S_INGRESS_ANNOTATIONS'))
            paths = [{
                'path':
                '/tensorboards/{}'.format(self.project_name.replace('.', '/')),
                'backend': {
                    'serviceName': resource_name,
                    'servicePort': ports[0]
                }
            }]
            ingress = ingresses.get_ingress(
                namespace=self.namespace,
                name=resource_name,
                labels=self.resource_manager.get_labels(),
                annotations=annotations,
                paths=paths)
            self.create_or_update_ingress(name=resource_name, data=ingress)

        return results
示例#11
0
    def start_tensorboard(self,
                          image,
                          outputs_path,
                          persistence_outputs,
                          outputs_specs=None,
                          outputs_refs_jobs=None,
                          outputs_refs_experiments=None,
                          resources=None,
                          node_selector=None,
                          affinity=None,
                          tolerations=None):
        ports = [self.request_tensorboard_port()]
        target_ports = [self.PORT]
        volumes, volume_mounts = get_pod_outputs_volume(persistence_outputs)
        refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes(
            outputs_refs=outputs_refs_jobs,
            persistence_outputs=persistence_outputs)
        volumes += refs_volumes
        volume_mounts += refs_volume_mounts
        refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes(
            outputs_refs=outputs_specs,
            persistence_outputs=persistence_outputs)
        volumes += refs_volumes
        volume_mounts += refs_volume_mounts
        refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes(
            outputs_refs=outputs_refs_experiments,
            persistence_outputs=persistence_outputs)
        volumes += refs_volumes
        volume_mounts += refs_volume_mounts

        # Add volumes for persistence outputs secrets
        stores_secrets = get_stores_secrets(specs=outputs_specs)
        self.validate_stores_secrets_keys(stores_secrets=stores_secrets)
        secrets_volumes, secrets_volume_mounts = self.get_stores_secrets_volumes(
            stores_secrets=stores_secrets)
        volumes += secrets_volumes
        volume_mounts += secrets_volume_mounts

        # Get persistence outputs secrets auth commands
        command_args = self.get_stores_secrets_command_args(
            stores_secrets=stores_secrets)
        command_args.append("tensorboard --logdir={} --port={}".format(
            outputs_path, self.PORT))

        node_selector = get_node_selector(
            node_selector=node_selector,
            default_node_selector=settings.NODE_SELECTOR_TENSORBOARDS)
        affinity = get_affinity(
            affinity=affinity, default_affinity=settings.AFFINITY_TENSORBOARDS)
        tolerations = get_tolerations(
            tolerations=tolerations,
            default_tolerations=settings.TOLERATIONS_TENSORBOARDS)
        deployment = deployments.get_deployment(
            namespace=self.namespace,
            app=settings.APP_LABELS_TENSORBOARD,
            name=TENSORBOARD_JOB_NAME,
            project_name=self.project_name,
            project_uuid=self.project_uuid,
            job_name=self.job_name,
            job_uuid=self.job_uuid,
            volume_mounts=volume_mounts,
            volumes=volumes,
            image=image,
            command=["/bin/sh", "-c"],
            args=[' && '.join(command_args)],
            ports=target_ports,
            container_name=settings.CONTAINER_NAME_PLUGIN_JOB,
            resources=resources,
            node_selector=node_selector,
            affinity=affinity,
            tolerations=tolerations,
            role=settings.ROLE_LABELS_DASHBOARD,
            type=settings.TYPE_LABELS_RUNNER)
        deployment_name = JOB_NAME_FORMAT.format(name=TENSORBOARD_JOB_NAME,
                                                 job_uuid=self.job_uuid)
        deployment_labels = deployments.get_labels(
            app=settings.APP_LABELS_TENSORBOARD,
            project_name=self.project_name,
            project_uuid=self.project_uuid,
            job_name=self.job_name,
            job_uuid=self.job_uuid,
            role=settings.ROLE_LABELS_DASHBOARD,
            type=settings.TYPE_LABELS_RUNNER)

        dep_resp, _ = self.create_or_update_deployment(name=deployment_name,
                                                       data=deployment)
        service = services.get_service(namespace=self.namespace,
                                       name=deployment_name,
                                       labels=deployment_labels,
                                       ports=ports,
                                       target_ports=target_ports,
                                       service_type=self._get_service_type())
        service_resp, _ = self.create_or_update_service(name=deployment_name,
                                                        data=service)
        results = {
            'deployment': dep_resp.to_dict(),
            'service': service_resp.to_dict()
        }

        if self._use_ingress():
            annotations = json.loads(settings.K8S_INGRESS_ANNOTATIONS)
            paths = [{
                'path':
                '/tensorboard/{}'.format(self.project_name.replace('.', '/')),
                'backend': {
                    'serviceName': deployment_name,
                    'servicePort': ports[0]
                }
            }]
            ingress = ingresses.get_ingress(namespace=self.namespace,
                                            name=deployment_name,
                                            labels=deployment_labels,
                                            annotations=annotations,
                                            paths=paths)
            self.create_or_update_ingress(name=deployment_name, data=ingress)

        return results
示例#12
0
    def start_tensorboard(self,
                          outputs_path,
                          persistence_outputs,
                          outputs_specs=None,
                          outputs_refs_jobs=None,
                          outputs_refs_experiments=None,
                          resources=None,
                          labels=None,
                          annotations=None,
                          node_selector=None,
                          affinity=None,
                          tolerations=None,
                          max_restarts=None,
                          reconcile_url=None):
        ports = [self.request_tensorboard_port()]
        target_ports = [self.port]
        volumes, volume_mounts = get_pod_outputs_volume(persistence_outputs)
        refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes(
            outputs_refs=outputs_refs_jobs,
            persistence_outputs=persistence_outputs)
        volumes += refs_volumes
        volume_mounts += refs_volume_mounts
        refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes(
            outputs_refs=outputs_specs,
            persistence_outputs=persistence_outputs)
        volumes += refs_volumes
        volume_mounts += refs_volume_mounts
        refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes(
            outputs_refs=outputs_refs_experiments,
            persistence_outputs=persistence_outputs)
        volumes += refs_volumes
        volume_mounts += refs_volume_mounts

        # Add volumes for persistence outputs secrets
        stores_secrets = get_stores_secrets(specs=outputs_specs)
        self.validate_stores_secrets_keys(stores_secrets=stores_secrets)
        secrets_volumes, secrets_volume_mounts = self.get_stores_secrets_volumes(
            stores_secrets=stores_secrets)
        volumes += secrets_volumes
        volume_mounts += secrets_volume_mounts

        resource_name = self.resource_manager.get_resource_name()
        tensorboard_url = self._get_proxy_url(namespace=self.namespace,
                                              job_name=TENSORBOARD_JOB_NAME,
                                              deployment_name=resource_name)
        # Get persistence outputs secrets auth commands
        command_args = self.get_stores_secrets_command_args(
            stores_secrets=stores_secrets)
        command_args.append("tensorboard "
                            "--logdir={log_dir} "
                            "--port={port} "
                            "--path_prefix={path_prefix}".format(
                                log_dir=outputs_path,
                                port=self.port,
                                path_prefix=tensorboard_url))
        args = [' && '.join(command_args)]
        command = ["/bin/sh", "-c"]

        labels = get_labels(default_labels=self.resource_manager.labels,
                            labels=labels)
        deployment = self.resource_manager.get_deployment(
            resource_name=resource_name,
            volume_mounts=volume_mounts,
            volumes=volumes,
            labels=labels,
            env_vars=None,
            command=command,
            args=args,
            persistence_outputs=persistence_outputs,
            outputs_refs_jobs=outputs_refs_jobs,
            outputs_refs_experiments=outputs_refs_experiments,
            resources=resources,
            annotations=annotations,
            ephemeral_token=None,
            node_selector=node_selector,
            affinity=affinity,
            tolerations=tolerations,
            ports=target_ports,
            reconcile_url=reconcile_url,
            max_restarts=max_restarts,
            restart_policy=get_deployment_restart_policy(max_restarts))

        dep_resp, _ = self.create_or_update_deployment(name=resource_name,
                                                       body=deployment,
                                                       reraise=True)
        service = services.get_service(
            namespace=self.namespace,
            name=resource_name,
            labels=self.resource_manager.get_labels(),
            ports=ports,
            target_ports=target_ports,
            service_type=self._get_service_type())
        service_resp, _ = self.create_or_update_service(name=resource_name,
                                                        body=service,
                                                        reraise=True)
        results = {
            'deployment': dep_resp.to_dict(),
            'service': service_resp.to_dict()
        }

        if self._use_ingress():
            annotations = json.loads(conf.get(K8S_INGRESS_ANNOTATIONS))
            paths = [{
                'path':
                '/tensorboards/{}'.format(self.project_name.replace('.', '/')),
                'backend': {
                    'serviceName': resource_name,
                    'servicePort': ports[0]
                }
            }]
            ingress = ingresses.get_ingress(
                namespace=self.namespace,
                name=resource_name,
                labels=self.resource_manager.get_labels(),
                annotations=annotations,
                paths=paths)
            self.create_or_update_ingress(name=resource_name,
                                          body=ingress,
                                          reraise=True)

        return results