def _configure_secrets(self, config, job_exe, job_type, interface): """Creates a copy of the configuration, configures secrets (masked in one of the copies), and applies any final configuration :param config: The execution configuration, where the secrets will be masked out :type config: :class:`job.configuration.json.execution.exe_config.ExecutionConfiguration` :param job_exe: The job execution model being scheduled :type job_exe: :class:`job.models.JobExecution` :param job_type: The job type model :type job_type: :class:`job.models.JobType` :param interface: The job interface :type interface: :class:`job.configuration.interface.job_interface.JobInterface` :returns: The copy of the execution configuration that contains the secrets :rtype: :class:`job.configuration.json.execution.exe_config.ExecutionConfiguration` """ # Copy the configuration config_with_secrets = config.create_copy() # Configure settings values, some are secret if job_type.is_system: config.add_to_task('main', settings=self._system_settings_hidden) config_with_secrets.add_to_task('main', settings=self._system_settings) else: config.add_to_task('pre', settings=self._system_settings_hidden) config_with_secrets.add_to_task('pre', settings=self._system_settings) config.add_to_task('post', settings=self._system_settings_hidden) config_with_secrets.add_to_task('post', settings=self._system_settings) job_config = job_type.get_job_configuration() secret_settings = secrets_mgr.retrieve_job_type_secrets(job_type.get_secrets_key()) for _config, secrets_hidden in [(config, True), (config_with_secrets, False)]: task_settings = {} # TODO: use better interface method once we switch to Seed for setting in interface.get_dict()['settings']: name = setting['name'] if setting['secret']: value = None if name in secret_settings: value = secret_settings[name] if value is not None and secrets_hidden: value = '*****' else: value = job_config.get_setting_value(name) if setting['required'] or value is not None: task_settings[name] = value # TODO: command args and env var replacement from the interface should be removed once Scale drops # support for old-style job types args = config._get_task_dict('main')['args'] args = JobInterface._replace_command_parameters(args, task_settings) env_vars = interface.populate_env_vars_arguments(task_settings) _config.add_to_task('main', args=args, env_vars=env_vars, settings=task_settings) # Configure env vars for settings for _config in [config, config_with_secrets]: for task_type in _config.get_task_types(): env_vars = {} for name, value in _config.get_settings(task_type).items(): if value is not None: env_name = normalize_env_var_name(name) env_vars[env_name] = value _config.add_to_task(task_type, env_vars=env_vars) # Configure Docker parameters for env vars and Docker volumes for _config in [config, config_with_secrets]: existing_volumes = set() for task_type in _config.get_task_types(): docker_params = [] for name, value in _config.get_env_vars(task_type).items(): docker_params.append(DockerParameter('env', '%s=%s' % (name, value))) for name, volume in _config.get_volumes(task_type).items(): docker_params.append(volume.to_docker_param(is_created=(name in existing_volumes))) existing_volumes.add(name) _config.add_to_task(task_type, docker_params=docker_params) # TODO: this feature should be removed once Scale drops support for job type docker params # Configure docker parameters listed in job type if job_type.docker_params: docker_params = [] for key, value in job_type.docker_params.items(): docker_params.append(DockerParameter(key, value)) if docker_params: config.add_to_task('main', docker_params=docker_params) config_with_secrets.add_to_task('main', docker_params=docker_params) return config_with_secrets
def configure_queued_job(self, job): """Creates and returns an execution configuration for the given queued job. The given job model should have its related job_type and job_type_rev models populated. :param job: The queued job model :type job: :class:`job.models.Job` :returns: The execution configuration for the queued job :rtype: :class:`job.configuration.json.execution.exe_config.ExecutionConfiguration` """ config = ExecutionConfiguration() data = job.get_job_data() # Add input file meta-data input_files_dict = self._create_input_file_dict(data) config.set_input_files(input_files_dict) # Set up env vars for job's input data env_vars = {} input_values = {} # TODO: refactor after Seed upgrade # This step makes sure that all inputs get replaced with blank if a value is not provided for input_data_dict in job.get_job_interface().definition['input_data']: input_values[input_data_dict['name']] = '' # Everything gets a blank value by default # TODO: refactor this to use JobData method after Seed upgrade for data_input in data.get_dict()['input_data']: input_name = data_input['name'] env_var_name = normalize_env_var_name(input_name) if 'value' in data_input: env_vars[env_var_name] = data_input['value'] input_values[input_name] = data_input['value'] if 'file_id' in data_input: input_file = input_files_dict[input_name][0] file_name = os.path.basename(input_file.workspace_path) if input_file.local_file_name: file_name = input_file.local_file_name env_vars[env_var_name] = os.path.join(SCALE_JOB_EXE_INPUT_PATH, input_name, file_name) input_values[input_name] = os.path.join(SCALE_JOB_EXE_INPUT_PATH, input_name, file_name) elif 'file_ids' in data_input: env_vars[env_var_name] = os.path.join(SCALE_JOB_EXE_INPUT_PATH, input_name) input_values[input_name] = os.path.join(SCALE_JOB_EXE_INPUT_PATH, input_name) task_workspaces = {} if job.job_type.is_system: # Add any workspaces needed for this system job task_workspaces = QueuedExecutionConfigurator._system_job_workspaces(job) else: # Set any output workspaces needed # TODO: In the future, output workspaces can be moved from job data to configuration, moving this step to # the ScheduledExecutionConfigurator self._cache_workspace_names(data.get_output_workspace_ids()) output_workspaces = {} for output, workspace_id in data.get_output_workspaces().items(): output_workspaces[output] = self._cached_workspace_names[workspace_id] config.set_output_workspaces(output_workspaces) # Create main task with fields populated from input data args = job.get_job_interface().get_command_args() # TODO: command arg input param replacement can be removed when old-style job type support is dropped args = JobInterface._replace_command_parameters(args, input_values) config.create_tasks(['main']) config.add_to_task('main', args=args, env_vars=env_vars, workspaces=task_workspaces) return config
def _configure_regular_job(config, job_exe, job_type): """Configures the given execution as a regular (non-system) job by adding pre and post tasks, input/output mounts, etc :param config: The execution configuration :type config: :class:`job.configuration.json.execution.exe_config.ExecutionConfiguration` :param job_exe: The job execution model being scheduled :type job_exe: :class:`job.models.JobExecution` :param job_type: The job type model :type job_type: :class:`job.models.JobType` """ config.create_tasks(['pull', 'pre', 'main', 'post']) config.add_to_task('pull', args=create_pull_command(job_type.docker_image)) env_vars = {'SCALE_JOB_ID': unicode(job_exe.job_id), 'SCALE_EXE_NUM': unicode(job_exe.exe_num)} config.add_to_task('pre', args=PRE_TASK_COMMAND_ARGS, env_vars=env_vars) config.add_to_task('post', args=POST_TASK_COMMAND_ARGS, env_vars=env_vars) # Configure input workspaces ro_input_workspaces = {} rw_input_workspaces = {} for input_workspace in config.get_input_workspace_names(): ro_input_workspaces[input_workspace] = TaskWorkspace(input_workspace, MODE_RO) rw_input_workspaces[input_workspace] = TaskWorkspace(input_workspace, MODE_RW) config.add_to_task('pre', workspaces=ro_input_workspaces) config.add_to_task('main', workspaces=ro_input_workspaces) # Post tasks have access to input workspaces in case input files need moved as part of parse results config.add_to_task('post', workspaces=rw_input_workspaces) # Configure output workspaces output_workspaces = {} for output_workspace in config.get_output_workspace_names(): output_workspaces[output_workspace] = TaskWorkspace(output_workspace, MODE_RW) config.add_to_task('post', workspaces=output_workspaces) # Configure input/output mounts input_mnt_name = 'scale_input_mount' output_mnt_name = 'scale_output_mount' input_vol_name = get_job_exe_input_vol_name(job_exe) output_vol_name = get_job_exe_output_vol_name(job_exe) input_vol_ro = Volume(input_vol_name, SCALE_JOB_EXE_INPUT_PATH, MODE_RO, is_host=False) input_vol_rw = Volume(input_vol_name, SCALE_JOB_EXE_INPUT_PATH, MODE_RW, is_host=False) output_vol_ro = Volume(output_vol_name, SCALE_JOB_EXE_OUTPUT_PATH, MODE_RO, is_host=False) output_vol_rw = Volume(output_vol_name, SCALE_JOB_EXE_OUTPUT_PATH, MODE_RW, is_host=False) config.add_to_task('pre', mount_volumes={input_mnt_name: input_vol_rw, output_mnt_name: output_vol_rw}) config.add_to_task('main', mount_volumes={input_mnt_name: input_vol_ro, output_mnt_name: output_vol_rw}) config.add_to_task('post', mount_volumes={output_mnt_name: output_vol_ro}) # Configure output directory # TODO: original output dir and command arg replacement can be removed when Scale no longer supports old-style # job types env_vars = {'job_output_dir': SCALE_JOB_EXE_OUTPUT_PATH, 'OUTPUT_DIR': SCALE_JOB_EXE_OUTPUT_PATH} args = config._get_task_dict('main')['args'] args = JobInterface._replace_command_parameters(args, env_vars) config.add_to_task('main', args=args, env_vars=env_vars) # Configure task resources resources = job_exe.get_resources() # Pull-task and pre-task require full amount of resources config.add_to_task('pull', resources=resources) config.add_to_task('pre', resources=resources) # Main-task no longer requires the input file space resources.subtract(NodeResources([Disk(job_exe.input_file_size)])) config.add_to_task('main', resources=resources) # Post-task no longer requires any disk space resources.remove_resource('disk') config.add_to_task('post', resources=resources)