예제 #1
0
파일: job_data.py 프로젝트: ctc-oss/scale
    def get_injected_env_vars(self, input_files_dict):
        """Apply all execution time values to job data

        :param input_files: Mapping of input names to InputFiles
        :type input_files: {str, :class:`job.execution.configuration.input_file.InputFile`}
        :return: Mapping of all input keys to their true file / property values
        :rtype: {str, str}
        """
        env_vars = {}
        for data_input in self.get_dict()['input_data']:
            input_name = data_input['name']
            if 'value' in data_input:
                env_vars[normalize_env_var_name(
                    input_name)] = data_input['value']
            if 'file_id' in data_input:
                input_file = input_files_dict[input_name][0]
                file_name = os.path.basename(input_file.workspace_path)
                if input_file.local_file_name:
                    file_name = input_file.local_file_name
                env_vars[normalize_env_var_name(input_name)] = os.path.join(
                    SCALE_JOB_EXE_INPUT_PATH, input_name, file_name)
            elif 'file_ids' in data_input:
                env_vars[normalize_env_var_name(input_name)] = os.path.join(
                    SCALE_JOB_EXE_INPUT_PATH, input_name)
        return env_vars
예제 #2
0
    def _check_for_name_collisions(self):
        """Ensures all names that map to environment variables are unique, and throws a
        :class:`job.seed.exceptions.InvalidInterfaceDefinition` if they are not unique.

        Per Seed specification for implementors we must validate that all reserved keywords, settings
        and inputs are unique as they are ultimately injected as environment variables.
        """

        # Include reserved keywords
        env_vars = ["OUTPUT_DIR"]

        env_vars += [
            normalize_env_var_name(setting['name'])
            for setting in self.get_settings()
        ]
        env_vars += [
            normalize_env_var_name(input_file['name'])
            for input_file in self.get_input_files()
        ]
        env_vars += [
            normalize_env_var_name(json['name'])
            for json in self.get_input_json()
        ]
        env_vars += [
            normalize_env_var_name('ALLOCATED_' + resource['name'])
            for resource in self.get_scalar_resources()
        ]

        if len(env_vars) != len(set(env_vars)):
            raise InvalidSeedManifestDefinition(
                'NAME_COLLISION_ERROR',
                'Collisions are not allowed between reserved keywords, resources, settings'
                'and input names.')
예제 #3
0
    def get_injected_env_vars(self, input_files):
        """Inject all execution time values to job data mappings

        :param input_files: Mapping of input names to InputFiles
        :type input_files: {str, :class:`job.execution.configuration.input_file.InputFile`}
        :return: Mapping of all input keys to their true file / property values
        :rtype: {str, str}
        """
        env_vars = {}
        for file_input in self._new_data.values.values():
            if isinstance(file_input, FileValue):
                env_var_name = normalize_env_var_name(file_input.name)
                if len(file_input.file_ids) > 1:
                    # When we have input for multiple files, map in the entire directory
                    env_vars[env_var_name] = os.path.join(
                        SCALE_JOB_EXE_INPUT_PATH, file_input.name)
                else:
                    input_file = input_files[file_input.name][0]
                    file_name = os.path.basename(input_file.workspace_path)
                    if input_file.local_file_name:
                        file_name = input_file.local_file_name
                    env_vars[env_var_name] = os.path.join(
                        SCALE_JOB_EXE_INPUT_PATH, file_input.name, file_name)
        for json_input in self._new_data.values.values():
            if isinstance(file_input, JsonValue):
                env_vars[normalize_env_var_name(
                    json_input.name)] = json_input.value

        return env_vars
예제 #4
0
    def get_injected_env_vars(self, input_files, interface):
        """Inject all execution time values to job data mappings

        :param input_files: Mapping of input names to InputFiles
        :type input_files: {str, :class:`job.execution.configuration.input_file.InputFile`}
        :param interface: The interface to which this data is being passed
        :type interface: :class:`data.interface.interface.Interface`
        :return: Mapping of all input keys to their true file / property values
        :rtype: {str, str}
        """
        env_vars = {}
        for file_input in self._new_data.values.values():
            if isinstance(file_input, FileValue):
                env_var_name = normalize_env_var_name(file_input.name)
                if file_input.name not in interface.parameters:
                    logger.warning("File input %s not specified in interface %s" % (file_input.name, interface))
                if file_input.name in interface.parameters and interface.parameters[file_input.name].multiple:
                    # When we have input for multiple files, map in the entire directory
                    env_vars[env_var_name] = os.path.join(SCALE_JOB_EXE_INPUT_PATH, file_input.name)
                else:
                    input_file = input_files[file_input.name][0]
                    file_name = input_file.file_name
                    if input_file.local_file_name:
                        file_name = input_file.local_file_name
                    env_vars[env_var_name] = os.path.join(SCALE_JOB_EXE_INPUT_PATH, file_input.name, file_name)

        for json_input in self._new_data.values.values():
            if isinstance(json_input, JsonValue):
                env_vars[normalize_env_var_name(json_input.name)] = str(json_input.value)

        return env_vars
예제 #5
0
    def _configure_secrets(self, config, job_exe, job_type, interface):
        """Creates a copy of the configuration, configures secrets (masked in one of the copies), and applies any final
        configuration

        :param config: The execution configuration, where the secrets will be masked out
        :type config: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration`
        :param job_exe: The job execution model being scheduled
        :type job_exe: :class:`job.models.JobExecution`
        :param job_type: The job type model
        :type job_type: :class:`job.models.JobType`
        :param interface: The job interface
        :type interface: :class:`job.configuration.interface.job_interface.JobInterface`
        :returns: The copy of the execution configuration that contains the secrets
        :rtype: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration`
        """

        # Copy the configuration
        config_with_secrets = config.create_copy()

        # Configure settings values, some are secret
        if job_type.is_system:
            config.add_to_task('main', settings=self._system_settings_hidden)
            config_with_secrets.add_to_task('main', settings=self._system_settings)
        else:
            config.add_to_task('pre', settings=self._system_settings_hidden)
            config_with_secrets.add_to_task('pre', settings=self._system_settings)
            config.add_to_task('post', settings=self._system_settings_hidden)
            config_with_secrets.add_to_task('post', settings=self._system_settings)
            job_config = job_type.get_job_configuration()
            secret_settings = secrets_mgr.retrieve_job_type_secrets(job_type.get_secrets_key())
            for _config, secrets_hidden in [(config, True), (config_with_secrets, False)]:
                task_settings = {}
                for setting in interface.get_settings():
                    name = setting['name']
                    if setting['secret']:
                        value = None
                        if name in secret_settings:
                            value = secret_settings[name]
                            if value is not None and secrets_hidden:
                                value = '*****'
                    else:
                        value = job_config.get_setting_value(name)
                    if 'required' in setting and setting['required'] or value is not None:
                        task_settings[name] = value
                # TODO: command args and env var replacement from the interface should be removed once Scale drops
                # support for old-style job types
                args = config._get_task_dict('main')['args']
                if JobInterfaceSunset.is_seed_dict(interface.definition):
                    env_vars = task_settings
                # TODO: Remove this else block when old-style job types are removed
                else:
                    args = JobInterface.replace_command_parameters(args, task_settings)
                    env_vars = interface.populate_env_vars_arguments(task_settings)
                _config.add_to_task('main', args=args, env_vars=env_vars, settings=task_settings)

        # Configure env vars for settings
        for _config in [config, config_with_secrets]:
            for task_type in _config.get_task_types():
                env_vars = {}
                for name, value in _config.get_settings(task_type).items():
                    if value is not None:
                        env_name = normalize_env_var_name(name)
                        env_vars[env_name] = value
                _config.add_to_task(task_type, env_vars=env_vars)

        # Configure Docker parameters for env vars and Docker volumes
        for _config in [config, config_with_secrets]:
            existing_volumes = set()
            for task_type in _config.get_task_types():
                docker_params = []
                for name, value in _config.get_env_vars(task_type).items():
                    docker_params.append(DockerParameter('env', '%s=%s' % (name, value)))
                for name, volume in _config.get_volumes(task_type).items():
                    docker_params.append(volume.to_docker_param(is_created=(name in existing_volumes)))
                    existing_volumes.add(name)
                _config.add_to_task(task_type, docker_params=docker_params)

        # TODO: this feature should be removed once Scale drops support for job type docker params
        # Configure docker parameters listed in job type
        if job_type.docker_params:
            docker_params = []
            for key, value in job_type.docker_params.items():
                docker_params.append(DockerParameter(key, value))
            if docker_params:
                config.add_to_task('main', docker_params=docker_params)
                config_with_secrets.add_to_task('main', docker_params=docker_params)

        return config_with_secrets
예제 #6
0
    def _configure_all_tasks(self, config, job_exe, job_type):
        """Configures the given execution with items that apply to all tasks

        :param config: The execution configuration
        :type config: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration`
        :param job_exe: The job execution model being scheduled
        :type job_exe: :class:`job.models.JobExecution`
        :param job_type: The job type model
        :type job_type: :class:`job.models.JobType`
        """

        config.set_task_ids(job_exe.get_cluster_id())

        for task_type in config.get_task_types():
            # Configure env vars describing allocated task resources
            env_vars = {}
            for resource in config.get_resources(task_type).resources:
                env_name = 'ALLOCATED_%s' % normalize_env_var_name(resource.name)
                env_vars[env_name] = '%.1f' % resource.value  # Assumes scalar resources

            # Configure env vars for Scale meta-data
            env_vars['SCALE_JOB_ID'] = unicode(job_exe.job_id)
            env_vars['SCALE_EXE_NUM'] = unicode(job_exe.exe_num)
            if job_exe.recipe_id:
                env_vars['SCALE_RECIPE_ID'] = unicode(job_exe.recipe_id)
            if job_exe.batch_id:
                env_vars['SCALE_BATCH_ID'] = unicode(job_exe.batch_id)

            # Configure workspace volumes
            workspace_volumes = {}
            for task_workspace in config.get_workspaces(task_type):
                logger.debug(self._workspaces)
                workspace_model = self._workspaces[task_workspace.name]
                # TODO: Should refactor workspace broker to return a Volume object and remove BrokerVolume
                if workspace_model.volume:
                    vol_name = get_workspace_volume_name(job_exe, task_workspace.name)
                    cont_path = get_workspace_volume_path(workspace_model.name)
                    if workspace_model.volume.host:
                        host_path = workspace_model.volume.remote_path
                        volume = Volume(vol_name, cont_path, task_workspace.mode, is_host=True, host_path=host_path)
                    else:
                        driver = workspace_model.volume.driver
                        driver_opts = {}
                        # TODO: Hack alert for nfs broker, as stated above, we should return Volume from broker
                        if driver == 'nfs':
                            driver_opts = {'share': workspace_model.volume.remote_path}
                        volume = Volume(vol_name, cont_path, task_workspace.mode, is_host=False, driver=driver,
                                        driver_opts=driver_opts)
                    workspace_volumes[task_workspace.name] = volume

            config.add_to_task(task_type, env_vars=env_vars, wksp_volumes=workspace_volumes)

        # Labels for metric grouping
        job_id_label = DockerParameter('label', 'scale-job-id={}'.format(job_exe.job_id))
        job_execution_id_label = DockerParameter('label', 'scale-job-execution-id={}'.format(job_exe.exe_num))
        job_type_name_label = DockerParameter('label', 'scale-job-type-name={}'.format(job_type.name))
        job_type_version_label = DockerParameter('label', 'scale-job-type-version={}'.format(job_type.version))
        main_label = DockerParameter('label', 'scale-task-type=main')
        config.add_to_task('main', docker_params=[job_id_label, job_type_name_label, job_type_version_label,
                                                  job_execution_id_label, main_label])
        if not job_type.is_system:
            pre_label = DockerParameter('label', 'scale-task-type=pre')
            post_label = DockerParameter('label', 'scale-task-type=post')
            config.add_to_task('pre', docker_params=[job_id_label, job_type_name_label, job_type_version_label,
                                                     job_execution_id_label, pre_label])
            config.add_to_task('post', docker_params=[job_id_label, job_type_name_label, job_type_version_label,
                                                  job_execution_id_label, post_label])

        # Configure tasks for logging
        if settings.LOGGING_ADDRESS is not None:
            log_driver = DockerParameter('log-driver', 'syslog')
            # Must explicitly specify RFC3164 to ensure compatibility with logstash in Docker 1.11+
            syslog_format = DockerParameter('log-opt', 'syslog-format=rfc3164')
            log_address = DockerParameter('log-opt', 'syslog-address=%s' % settings.LOGGING_ADDRESS)
            if not job_type.is_system:
                pre_task_tag = DockerParameter('log-opt', 'tag=%s|%s' % (config.get_task_id('pre'), job_type.name))
                config.add_to_task('pre', docker_params=[log_driver, syslog_format, log_address, pre_task_tag])
                post_task_tag = DockerParameter('log-opt', 'tag=%s|%s' % (config.get_task_id('post'), job_type.name))
                config.add_to_task('post', docker_params=[log_driver, syslog_format, log_address, post_task_tag])
                # TODO: remove es_urls parameter when Scale no longer supports old style job types
                es_urls = None
                # Use connection pool to get up-to-date list of elasticsearch nodes
                if settings.ELASTICSEARCH:
                    hosts = [host.host for host in settings.ELASTICSEARCH.transport.connection_pool.connections]
                    es_urls = ','.join(hosts)
                # Post task needs ElasticSearch URL to grab logs for old artifact registration
                es_param = DockerParameter('env', 'SCALE_ELASTICSEARCH_URLS=%s' % es_urls)
                config.add_to_task('post', docker_params=[es_param])
            main_task_tag = DockerParameter('log-opt', 'tag=%s|%s' % (config.get_task_id('main'), job_type.name))
            config.add_to_task('main', docker_params=[log_driver, syslog_format, log_address, main_task_tag])
예제 #7
0
    def _configure_all_tasks(self, config, job_exe, job_type):
        """Configures the given execution with items that apply to all tasks

        :param config: The execution configuration
        :type config: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration`
        :param job_exe: The job execution model being scheduled
        :type job_exe: :class:`job.models.JobExecution`
        :param job_type: The job type model
        :type job_type: :class:`job.models.JobType`
        """

        config.set_task_ids(job_exe.get_cluster_id())

        for task_type in config.get_task_types():
            # Configure env vars describing allocated task resources
            env_vars = {}
            nvidia_docker_label = None

            for resource in config.get_resources(task_type).resources:
                env_name = 'ALLOCATED_%s' % normalize_env_var_name(
                    resource.name)
                env_vars[
                    env_name] = '%.1f' % resource.value  # Assumes scalar resources
                if resource.name == "gpus" and int(resource.value) > 0:
                    gpu_list = GPUManager.get_nvidia_docker_label(
                        job_exe.node_id, job_exe.job_id)
                    nvidia_docker_label = DockerParameter(
                        'env', 'NVIDIA_VISIBLE_DEVICES={}'.format(
                            gpu_list.strip(',')))

            # Configure env vars for Scale meta-data
            env_vars['SCALE_JOB_ID'] = unicode(job_exe.job_id)
            env_vars['SCALE_EXE_NUM'] = unicode(job_exe.exe_num)
            if job_exe.recipe_id:
                env_vars['SCALE_RECIPE_ID'] = unicode(job_exe.recipe_id)
            if job_exe.batch_id:
                env_vars['SCALE_BATCH_ID'] = unicode(job_exe.batch_id)

            # Configure workspace volumes
            workspace_volumes = {}
            for task_workspace in config.get_workspaces(task_type):
                logger.debug(self._workspaces)
                workspace_model = self._workspaces[task_workspace.name]
                # TODO: Should refactor workspace broker to return a Volume object and remove BrokerVolume
                if workspace_model.volume:
                    vol_name = get_workspace_volume_name(
                        job_exe, task_workspace.name)
                    cont_path = get_workspace_volume_path(workspace_model.name)
                    if workspace_model.volume.host:
                        host_path = workspace_model.volume.remote_path
                        volume = Volume(vol_name,
                                        cont_path,
                                        task_workspace.mode,
                                        is_host=True,
                                        host_path=host_path)
                    else:
                        driver = workspace_model.volume.driver
                        driver_opts = {}
                        # TODO: Hack alert for nfs broker, as stated above, we should return Volume from broker
                        if driver == 'nfs':
                            driver_opts = {
                                'share': workspace_model.volume.remote_path
                            }
                        volume = Volume(vol_name,
                                        cont_path,
                                        task_workspace.mode,
                                        is_host=False,
                                        driver=driver,
                                        driver_opts=driver_opts)
                    workspace_volumes[task_workspace.name] = volume

            config.add_to_task(task_type,
                               env_vars=env_vars,
                               wksp_volumes=workspace_volumes)

        # Labels for metric grouping
        job_id_label = DockerParameter(
            'label', 'scale-job-id={}'.format(job_exe.job_id))
        job_execution_id_label = DockerParameter(
            'label', 'scale-job-execution-id={}'.format(job_exe.exe_num))
        job_type_name_label = DockerParameter(
            'label', 'scale-job-type-name={}'.format(job_type.name))
        job_type_version_label = DockerParameter(
            'label', 'scale-job-type-version={}'.format(job_type.version))
        main_label = DockerParameter('label', 'scale-task-type=main')
        if nvidia_docker_label:
            nvidia_runtime_param = DockerParameter('runtime', 'nvidia')
            config.add_to_task('main',
                               docker_params=[
                                   job_id_label, job_type_name_label,
                                   job_type_version_label,
                                   job_execution_id_label, main_label,
                                   nvidia_docker_label, nvidia_runtime_param
                               ])
        else:
            config.add_to_task('main',
                               docker_params=[
                                   job_id_label, job_type_name_label,
                                   job_type_version_label,
                                   job_execution_id_label, main_label
                               ])

        if not job_type.is_system:
            pre_label = DockerParameter('label', 'scale-task-type=pre')
            post_label = DockerParameter('label', 'scale-task-type=post')
            config.add_to_task('pre',
                               docker_params=[
                                   job_id_label, job_type_name_label,
                                   job_type_version_label,
                                   job_execution_id_label, pre_label
                               ])
            config.add_to_task('post',
                               docker_params=[
                                   job_id_label, job_type_name_label,
                                   job_type_version_label,
                                   job_execution_id_label, post_label
                               ])

        # Configure tasks for logging
        if settings.LOGGING_ADDRESS is not None:
            log_driver = DockerParameter('log-driver', 'fluentd')
            fluent_precision = DockerParameter(
                'log-opt', 'fluentd-sub-second-precision=true')
            log_address = DockerParameter(
                'log-opt', 'fluentd-address=%s' % settings.LOGGING_ADDRESS)
            if not job_type.is_system:
                pre_task_tag = DockerParameter(
                    'log-opt', 'tag=%s|%s|%s|%s|%s' %
                    (config.get_task_id('pre'), job_type.name,
                     job_type.version, job_exe.job_id, job_exe.exe_num))
                config.add_to_task('pre',
                                   docker_params=[
                                       log_driver, fluent_precision,
                                       log_address, pre_task_tag
                                   ])
                post_task_tag = DockerParameter(
                    'log-opt', 'tag=%s|%s|%s|%s|%s' %
                    (config.get_task_id('post'), job_type.name,
                     job_type.version, job_exe.job_id, job_exe.exe_num))
                config.add_to_task('post',
                                   docker_params=[
                                       log_driver, fluent_precision,
                                       log_address, post_task_tag
                                   ])
                # TODO: remove es_urls parameter when Scale no longer supports old style job types

                # Post task needs ElasticSearch URL to grab logs for old artifact registration
                es_param = DockerParameter(
                    'env', 'ELASTICSEARCH_URL=%s' % settings.ELASTICSEARCH_URL)
                config.add_to_task('post', docker_params=[es_param])
            main_task_tag = DockerParameter(
                'log-opt', 'tag=%s|%s|%s|%s|%s' %
                (config.get_task_id('main'), job_type.name, job_type.version,
                 job_exe.job_id, job_exe.exe_num))
            config.add_to_task('main',
                               docker_params=[
                                   log_driver, fluent_precision, log_address,
                                   main_task_tag
                               ])