Пример #1
0
    def authenticate_credentials(self, key):  # pylint:disable=arguments-differ
        try:
            auth_parts = base64.b64decode(key).decode('utf-8').split(
                RedisEphemeralTokens.SEPARATOR)
        except (TypeError, UnicodeDecodeError, binascii.Error):
            msg = 'Invalid basic header. Credentials not correctly base64 encoded.'
            raise exceptions.AuthenticationFailed(msg)

        if len(auth_parts) != 2:
            msg = 'Invalid token header. Token should contain token and uuid.'
            raise exceptions.AuthenticationFailed(msg)

        token = auth_parts[0]
        token_uuid = auth_parts[1]

        ephemeral_token = RedisEphemeralTokens(token_uuid)
        if not ephemeral_token:
            msg = 'Invalid token.'
            raise exceptions.AuthenticationFailed(msg)

        scope = ephemeral_token.scope
        if not ephemeral_token.check_token(token=token):
            ephemeral_token.clear()
            msg = 'Invalid token header'
            raise exceptions.AuthenticationFailed(msg)

        return EphemeralUser(scope=scope), None
Пример #2
0
    def run(cls):
        results = {}
        result = cls.redis_health(RedisEphemeralTokens.connection())
        if not result.is_healthy:
            results['REDIS_EPH_TOKENS'] = result

        result = cls.redis_health(RedisSessions.connection())
        if not result.is_healthy:
            results['REDIS_SESSIONS'] = result

        result = cls.redis_health(RedisTTL.connection())
        if not result.is_healthy:
            results['REDIS_TTL'] = result

        result = cls.redis_health(RedisToStream.connection())
        if not result.is_healthy:
            results['REDIS_TO_STREAM'] = result

        result = cls.redis_health(RedisJobContainers.connection())
        if not result.is_healthy:
            results['REDIS_CONTAINERS'] = result

        if not results:
            results = {'REDIS': Result()}

        return results
Пример #3
0
    def post(self, request, *args, **kwargs):
        user = request.user

        if user.scope is None:
            return Response(status=status.HTTP_403_FORBIDDEN)

        experiment = self.get_object()

        if experiment.last_status not in [ExperimentLifeCycle.SCHEDULED,
                                          ExperimentLifeCycle.STARTING,
                                          ExperimentLifeCycle.RUNNING]:
            return Response(status=status.HTTP_403_FORBIDDEN)

        scope = RedisEphemeralTokens.get_scope(user=experiment.user.id,
                                               model='experiment',
                                               object_id=experiment.id)
        if sorted(user.scope) != sorted(scope):
            return Response(status=status.HTTP_403_FORBIDDEN)

        token, _ = Token.objects.get_or_create(user=experiment.user)
        return Response({'token': token.key}, status=status.HTTP_200_OK)
Пример #4
0
def start_experiment(experiment):
    # Update experiment status to show that its started
    experiment.set_status(ExperimentLifeCycle.SCHEDULED)

    project = experiment.project
    group = experiment.experiment_group

    job_docker_image = None  # This will force the spawners to use the default docker image
    if experiment.specification.build:
        try:
            image_name, image_tag = get_image_info(
                build_job=experiment.build_job)
        except (ValueError, AttributeError):
            _logger.error('Could not start the experiment.', exc_info=True)
            experiment.set_status(ExperimentLifeCycle.FAILED,
                                  message='Image info was not found.')
            return
        job_docker_image = '{}:{}'.format(image_name, image_tag)
        _logger.info('Start experiment with built image `%s`',
                     job_docker_image)
    else:
        _logger.info('Start experiment with default image.')

    spawner_class = get_spawner_class(experiment.specification.framework)
    token_scope = RedisEphemeralTokens.get_scope(experiment.user.id,
                                                 'experiment', experiment.id)

    error = {}
    try:
        # Use spawners to start the experiment
        spawner = spawner_class(
            project_name=project.unique_name,
            experiment_name=experiment.unique_name,
            experiment_group_name=group.unique_name if group else None,
            project_uuid=project.uuid.hex,
            experiment_group_uuid=group.uuid.hex if group else None,
            experiment_uuid=experiment.uuid.hex,
            persistence_config=experiment.persistence_config,
            outputs_refs_experiments=experiment.outputs_refs_experiments,
            outputs_refs_jobs=experiment.outputs_refs_jobs,
            original_name=experiment.original_unique_name,
            cloning_strategy=experiment.cloning_strategy,
            spec=experiment.specification,
            k8s_config=conf.get('K8S_CONFIG'),
            namespace=conf.get('K8S_NAMESPACE'),
            in_cluster=True,
            job_docker_image=job_docker_image,
            use_sidecar=True,
            token_scope=token_scope)
        response = spawner.start_experiment()
        handle_experiment(experiment=experiment,
                          spawner=spawner,
                          response=response)
    except ApiException as e:
        _logger.error(
            'Could not start the experiment, please check your polyaxon spec.',
            exc_info=True)
        error = {
            'raised':
            True,
            'traceback':
            traceback.format_exc(),
            'message':
            'Could not start the experiment, encountered a Kubernetes ApiException.'
        }
    except VolumeNotFoundError as e:
        _logger.error(
            'Could not start the experiment, please check your volume definitions.',
            exc_info=True)
        error = {
            'raised':
            True,
            'traceback':
            traceback.format_exc(),
            'message':
            'Could not start the experiment, '
            'encountered a volume definition problem, %s.' % e
        }
    except Exception as e:
        _logger.error(
            'Could not start the experiment, please check your polyaxon spec',
            exc_info=True)
        error = {
            'raised':
            True,
            'traceback':
            traceback.format_exc(),
            'message':
            'Could not start the experiment encountered an {} exception.'.
            format(e.__class__.__name__)
        }
    finally:
        if error.get('raised'):
            experiment.set_status(ExperimentLifeCycle.FAILED,
                                  message=error.get('message'),
                                  traceback=error.get('traceback'))
Пример #5
0
    def _create_job(
            self,  # pylint:disable=arguments-differ
            task_type,
            command=None,
            args=None,
            env_vars=None,
            resources=None,
            node_selector=None,
            affinity=None,
            tolerations=None,
            replicas=1,
            restart_policy='Never'):
        ephemeral_token = None
        if self.token_scope:
            ephemeral_token = RedisEphemeralTokens.generate_header_token(
                scope=self.token_scope)
        resource_name = self.resource_manager.get_kf_resource_name(
            task_type=task_type)
        labels = self.resource_manager.get_labels(task_type=task_type)

        # Set and validate volumes
        volumes, volume_mounts = get_pod_volumes(
            persistence_outputs=self.persistence_config.outputs,
            persistence_data=self.persistence_config.data)
        refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes(
            outputs_refs=self.outputs_refs_jobs,
            persistence_outputs=self.persistence_config.outputs)
        volumes += refs_volumes
        volume_mounts += refs_volume_mounts
        refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes(
            outputs_refs=self.outputs_refs_experiments,
            persistence_outputs=self.persistence_config.outputs)
        volumes += refs_volumes
        volume_mounts += refs_volume_mounts
        shm_volumes, shm_volume_mounts = get_shm_volumes()
        volumes += shm_volumes
        volume_mounts += shm_volume_mounts

        context_volumes, context_mounts = get_auth_context_volumes()
        volumes += context_volumes
        volume_mounts += context_mounts

        # Validate secret and configmap refs
        secret_refs = validate_secret_refs(self.spec.secret_refs)
        configmap_refs = validate_configmap_refs(self.spec.configmap_refs)

        pod_template_spec = self.resource_manager.get_pod_template_spec(
            resource_name=resource_name,
            volume_mounts=volume_mounts,
            volumes=volumes,
            labels=labels,
            env_vars=env_vars,
            command=command,
            args=args,
            ports=self.ports,
            init_env_vars=self.get_init_env_vars(),
            persistence_outputs=self.persistence_config.outputs,
            persistence_data=self.persistence_config.data,
            outputs_refs_jobs=self.outputs_refs_jobs,
            outputs_refs_experiments=self.outputs_refs_experiments,
            secret_refs=secret_refs,
            configmap_refs=configmap_refs,
            resources=resources,
            ephemeral_token=ephemeral_token,
            node_selector=node_selector,
            affinity=affinity,
            tolerations=tolerations,
            init_context_mounts=context_mounts,
            restart_policy=restart_policy)
        return {
            'replicas': replicas,
            'restartPolicy': restart_policy,
            'template': pod_template_spec
        }
Пример #6
0
    def _create_job(self,
                    task_type,
                    task_idx,
                    add_service,
                    command=None,
                    args=None,
                    env_vars=None,
                    resources=None,
                    annotations=None,
                    node_selector=None,
                    affinity=None,
                    tolerations=None,
                    max_restarts=None):
        ephemeral_token = None
        if self.token_scope:
            ephemeral_token = RedisEphemeralTokens.generate_header_token(
                scope=self.token_scope)
        resource_name = self.resource_manager.get_resource_name(
            task_type=task_type, task_idx=task_idx)
        job_uuid = self.get_job_uuids(task_type=task_type, task_idx=task_idx)
        reconcile_url = get_experiment_reconcile_url(self.experiment_name,
                                                     job_uuid)
        labels = self.get_labels(task_type=task_type,
                                 task_idx=task_idx,
                                 job_uuid=job_uuid)

        # Set and validate volumes
        volumes, volume_mounts = get_pod_volumes(
            persistence_outputs=self.persistence_config.outputs,
            persistence_data=self.persistence_config.data)
        refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes(
            outputs_refs=self.outputs_refs_jobs,
            persistence_outputs=self.persistence_config.outputs)
        volumes += refs_volumes
        volume_mounts += refs_volume_mounts
        refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes(
            outputs_refs=self.outputs_refs_experiments,
            persistence_outputs=self.persistence_config.outputs)
        volumes += refs_volumes
        volume_mounts += refs_volume_mounts
        shm_volumes, shm_volume_mounts = get_shm_volumes()
        volumes += shm_volumes
        volume_mounts += shm_volume_mounts

        context_volumes, context_mounts = get_auth_context_volumes()
        volumes += context_volumes
        volume_mounts += context_mounts

        pod = self.resource_manager.get_task_pod(
            task_type=task_type,
            task_idx=task_idx,
            volume_mounts=volume_mounts,
            volumes=volumes,
            labels=labels,
            env_vars=env_vars,
            command=command,
            args=args,
            ports=self.ports,
            init_env_vars=self.get_init_env_vars(),
            persistence_outputs=self.persistence_config.outputs,
            persistence_data=self.persistence_config.data,
            outputs_refs_jobs=self.outputs_refs_jobs,
            outputs_refs_experiments=self.outputs_refs_experiments,
            secret_refs=self.spec.secret_refs,
            config_map_refs=self.spec.config_map_refs,
            resources=resources,
            ephemeral_token=ephemeral_token,
            node_selector=node_selector,
            affinity=affinity,
            tolerations=tolerations,
            init_context_mounts=context_mounts,
            reconcile_url=reconcile_url,
            max_restarts=max_restarts,
            restart_policy=get_pod_restart_policy(max_restarts))
        pod_resp, _ = self.create_or_update_pod(name=resource_name,
                                                body=pod,
                                                reraise=True)
        results = {'pod': pod_resp.to_dict()}
        if add_service:
            service = services.get_service(namespace=self.namespace,
                                           name=resource_name,
                                           labels=labels,
                                           ports=self.ports,
                                           target_ports=self.ports)
            service_resp, _ = self.create_or_update_service(name=resource_name,
                                                            body=service,
                                                            reraise=True)
            results['service'] = service_resp.to_dict()
        return results
    def _create_job(self,
                    task_type,
                    task_idx,
                    add_service,
                    command=None,
                    args=None,
                    env_vars=None,
                    resources=None,
                    node_selector=None,
                    affinity=None,
                    tolerations=None,
                    restart_policy='Never'):
        ephemeral_token = RedisEphemeralTokens.generate_header_token(
            scope=self.token_scope)
        job_name = self.pod_manager.get_job_name(task_type=task_type,
                                                 task_idx=task_idx)
        sidecar_args = get_sidecar_args(pod_id=job_name)
        labels = self.pod_manager.get_labels(task_type=task_type,
                                             task_idx=task_idx)

        # Set and validate volumes
        volumes, volume_mounts = get_pod_volumes(
            persistence_outputs=self.persistence_config.outputs,
            persistence_data=self.persistence_config.data)
        refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes(
            outputs_refs=self.outputs_refs_jobs,
            persistence_outputs=self.persistence_config.outputs)
        volumes += refs_volumes
        volume_mounts += refs_volume_mounts
        refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes(
            outputs_refs=self.outputs_refs_experiments,
            persistence_outputs=self.persistence_config.outputs)
        volumes += refs_volumes
        volume_mounts += refs_volume_mounts
        shm_volumes, shm_volume_mounts = get_shm_volumes()
        volumes += shm_volumes
        volume_mounts += shm_volume_mounts

        # Validate secret and configmap refs
        secret_refs = validate_secret_refs(self.spec.secret_refs)
        configmap_refs = validate_configmap_refs(self.spec.configmap_refs)

        pod = self.pod_manager.get_pod(
            task_type=task_type,
            task_idx=task_idx,
            volume_mounts=volume_mounts,
            volumes=volumes,
            labels=labels,
            env_vars=env_vars,
            command=command,
            args=args,
            sidecar_args=sidecar_args,
            persistence_outputs=self.persistence_config.outputs,
            persistence_data=self.persistence_config.data,
            outputs_refs_jobs=self.outputs_refs_jobs,
            outputs_refs_experiments=self.outputs_refs_experiments,
            secret_refs=secret_refs,
            configmap_refs=configmap_refs,
            resources=resources,
            ephemeral_token=ephemeral_token,
            node_selector=node_selector,
            affinity=affinity,
            tolerations=tolerations,
            restart_policy=restart_policy)
        pod_resp, _ = self.create_or_update_pod(name=job_name, data=pod)
        results = {'pod': pod_resp.to_dict()}
        if add_service:
            service = services.get_service(namespace=self.namespace,
                                           name=job_name,
                                           labels=labels,
                                           ports=self.pod_manager.ports,
                                           target_ports=self.pod_manager.ports)
            service_resp, _ = self.create_or_update_service(name=job_name,
                                                            data=service)
            results['service'] = service_resp.to_dict()
        return results
    def test_objects(self):
        token = RedisEphemeralTokens()
        assert token.key is not None
        assert token.redis_key == RedisEphemeralTokens.KEY_EPHEMERAL_TOKENS.format(
            token.key)

        assert token.get_state() is None
        assert token.salt is None
        assert token.ttl is None
        assert token.scope is None

        token = RedisEphemeralTokens.generate(
            scope=token.get_scope(1, 'experiment', 1))

        assert token.get_state() is not None
        assert token.salt is not None
        assert token.ttl == conf.get(TTL_EPHEMERAL_TOKEN)
        assert token.scope == token.get_scope(1, 'experiment', 1)
        assert token.check_token('foo') is False
        # Checking delete the token
        assert token.get_state() is None

        token = RedisEphemeralTokens.generate(
            scope=token.get_scope(1, 'experiment', 1))
        assert token.check_token(None) is False
        # Checking delete the token
        assert token.get_state() is None

        token = RedisEphemeralTokens.generate(
            scope=token.get_scope(1, 'experiment', 1))
        valid = RedisEphemeralTokens.make_token(token)
        assert token.check_token(valid) is True

        # Checking delete the token
        assert token.get_state() is None
        assert token.salt is None
        assert token.ttl is None
        assert token.scope is None