Exemplo n.º 1
0
	def _get_internal(self, desc, obj2str, str2obj, def2obj, option, default_obj,
			interactive=True, interactive_msg=None, interactive_msg_append_default=True, **kwargs):
		# interactive mode only overrides default values from the code
		uii = UserInputInterface()
		if interactive_msg and self.is_interactive(option, interactive):
			prompt = interactive_msg
			if interactive_msg_append_default and not unspecified(default_obj):
				prompt += (' [%s]' % self._get_default_str(default_obj, def2obj, obj2str))
			while True:
				handler = signal.signal(signal.SIGINT, signal.SIG_DFL)
				try:
					user_input = uii.prompt_text('%s: ' % prompt)
				except Exception:
					sys.exit(os.EX_DATAERR)
				signal.signal(signal.SIGINT, handler)
				if user_input != '':
					try:
						default_obj = str2obj(user_input)
					except Exception:
						clear_current_exception()
						self._log.warning('Unable to parse %s: %s\n', desc, user_input)
						continue
				break
		return TypedConfigInterface._get_internal(self, desc, obj2str, str2obj, def2obj,
			option, default_obj, **kwargs)
Exemplo n.º 2
0
	def _check_map_name2key(self, map_key2name, map_key2metadata_dict):
		# Find name <-> key collisions
		map_type2name2key_list = {}
		for (key, name) in map_key2name.items():
			if len(key) == 1:
				key_type = 'dataset'
			else:
				key_type = 'block'
			map_type2name2key_list.setdefault(key_type, {}).setdefault(name, []).append(key)
		collision = False
		map_key_type2vn_list = {
			'dataset': self._hash_input_set_dataset,
			'block': self._hash_input_set_dataset + self._hash_input_set_block
		}
		for (key_type, vn_list) in map_key_type2vn_list.items():
			for (name, key_list) in map_type2name2key_list.get(key_type, {}).items():
				if len(key_list) > 1:
					self._log.warn('Multiple %s keys are mapped to the name %s!', key_type, repr(name))
					for idx, key in enumerate(sorted(key_list)):
						self._log.warn('\tCandidate #%d with key %r:', idx + 1, str.join('#', key))
						metadata_dict = map_key2metadata_dict[key]
						for (vn, value) in filter_dict(metadata_dict, key_filter=vn_list.__contains__).items():
							self._log.warn('\t\t%s = %s', vn, value)
					collision = True
		if self._interactive_assignment and collision:
			if not UserInputInterface().prompt_bool('Do you want to continue?', False):
				sys.exit(os.EX_OK)
Exemplo n.º 3
0
 def do_transfer(self, desc_source_target_list):
     for (desc, source, target) in desc_source_target_list:
         if not self._storage_paths:
             raise ConfigError(
                 "%s can't be transferred because '%s path wasn't set" %
                 (desc, self._storage_channel))
         for idx, se_path in enumerate(set(self._storage_paths)):
             activity = Activity('Copy %s to SE %d ' % (desc, idx + 1))
             proc = se_copy(source, os.path.join(se_path, target),
                            self._storage_force)
             proc.status(timeout=5 * 60, terminate=True)
             activity.finish()
             if proc.status(timeout=0) == 0:
                 self._log.info('Copy %s to SE %d finished', desc, idx + 1)
             else:
                 self._log.info('Copy %s to SE %d failed', desc, idx + 1)
                 self._log.log_process(proc)
                 self._log.critical(
                     'Unable to copy %s! You can try to copy it manually.',
                     desc)
                 msg = 'Is %s (%s) available on SE %s?' % (desc, source,
                                                           se_path)
                 if not UserInputInterface().prompt_bool(msg, False):
                     raise StorageError('%s is missing on SE %s!' %
                                        (desc, se_path))
Exemplo n.º 4
0
	def __init__(self, config, source):
		self._psrc_raw = source
		BasicParameterAdapter.__init__(self, config, source)
		self._map_jobnum2pnum = {}
		ensure_dir_exists(config.get_work_path(), 'parameter storage directory', ParameterError)
		self._path_jobnum2pnum = config.get_work_path('params.map.gz')
		self._path_params = config.get_work_path('params.dat.gz')

		# Find out if init should be performed - overrides resync_requested!
		init_requested = config.get_state('init', detail='parameters')
		init_needed = False
		if not (os.path.exists(self._path_params) and os.path.exists(self._path_jobnum2pnum)):
			init_needed = True  # Init needed if no parameter log exists
		if init_requested and not init_needed and (source.get_parameter_len() is not None):
			self._log.warning('Re-Initialization will overwrite the current mapping ' +
				'between jobs and parameter/dataset content! This can lead to invalid results!')
			user_msg = ('Do you want to perform a syncronization between ' +
				'the current mapping and the new one to avoid this?')
			if UserInputInterface().prompt_bool(user_msg, True):
				init_requested = False
		do_init = init_requested or init_needed

		# Find out if resync should be performed
		resync_by_user = config.get_state('resync', detail='parameters')
		config.set_state(False, 'resync', detail='parameters')
		psrc_hash = self._psrc_raw.get_psrc_hash()
		self._psrc_hash_stored = config.get('parameter hash', psrc_hash, persistent=True)
		psrc_hash_changed = self._psrc_hash_stored != psrc_hash  # Resync if parameters have changed
		resync_by_psrc = self._psrc_raw.get_resync_request()

		if do_init:  # Write current state
			self._write_jobnum2pnum(self._path_jobnum2pnum)
			ParameterSource.get_class('GCDumpParameterSource').write(self._path_params,
				self.get_job_len(), self.get_job_metadata(), self.iter_jobs())
		elif resync_by_user or resync_by_psrc or psrc_hash_changed:  # Perform sync
			if psrc_hash_changed:
				self._log.info('Parameter hash has changed')
				self._log.debug('\told hash: %s', self._psrc_hash_stored)
				self._log.debug('\tnew hash: %s', psrc_hash)
				self._log.log(logging.DEBUG1, '\tnew src: %s', self._psrc_raw)
				config.set_state(True, 'init', detail='config')
			elif resync_by_psrc:
				self._log.info('Parameter source requested resync')
				self._log.debug('\t%r', str.join(', ', imap(repr, resync_by_psrc)))
			elif resync_by_user:
				self._log.info('User requested resync')
			self._psrc_hash_stored = None
			self._resync_state = self.resync(force=True)
		else:  # Reuse old mapping
			activity = Activity('Loading cached parameter information')
			self._read_jobnum2pnum()
			activity.finish()
			return  # do not set parameter hash in config
		config.set('parameter hash', self._psrc_raw.get_psrc_hash())
 def _get_internal(self,
                   desc,
                   obj2str,
                   str2obj,
                   def2obj,
                   option,
                   default_obj,
                   interactive=True,
                   interactive_msg=None,
                   interactive_msg_append_default=True,
                   **kwargs):
     # interactive mode only overrides default values from the code
     uii = UserInputInterface()
     if interactive_msg and self.is_interactive(option, interactive):
         prompt = interactive_msg
         if interactive_msg_append_default and not unspecified(default_obj):
             prompt += (
                 ' [%s]' %
                 self._get_default_str(default_obj, def2obj, obj2str))
         while True:
             handler = signal.signal(signal.SIGINT, signal.SIG_DFL)
             try:
                 user_input = uii.prompt_text('%s: ' % prompt)
             except Exception:
                 sys.exit(os.EX_DATAERR)
             signal.signal(signal.SIGINT, handler)
             if user_input != '':
                 try:
                     default_obj = str2obj(user_input)
                 except Exception:
                     clear_current_exception()
                     self._log.warning('Unable to parse %s: %s\n', desc,
                                       user_input)
                     continue
             break
     return TypedConfigInterface._get_internal(self, desc, obj2str, str2obj,
                                               def2obj, option, default_obj,
                                               **kwargs)
Exemplo n.º 6
0
    def _make_jdl(self, jobnum, task):
        job_config_fn = os.path.join(self._job_dn, 'job_%d.var' % jobnum)
        sb_in_src_list = lmap(lambda d_s_t: d_s_t[1],
                              self._get_in_transfer_info_list(task))
        sb_out_target_list = lmap(lambda d_s_t: d_s_t[2],
                                  self._get_out_transfer_info_list(task))
        wildcard_list = lfilter(lambda x: '*' in x, sb_out_target_list)
        if len(wildcard_list):
            self._write_job_config(job_config_fn, jobnum, task,
                                   {'GC_WC': str.join(' ', wildcard_list)})
            sb_out_fn_list = lfilter(lambda x: x not in wildcard_list,
                                     sb_out_target_list) + ['GC_WC.tar.gz']
        else:
            self._write_job_config(job_config_fn, jobnum, task, {})
            sb_out_fn_list = sb_out_target_list
        # Warn about too large sandboxes
        sb_in_size_list = lmap(os.path.getsize, sb_in_src_list)
        if sb_in_size_list:
            sb_in_size = sum(sb_in_size_list)
            if (self._sb_warn_size > 0) and (sb_in_size >
                                             self._sb_warn_size * 1024 * 1024):
                user_msg = 'Sandbox is very large (%d bytes) and can cause issues with the WMS!' % sb_in_size
                user_msg += ' Do you want to continue?'
                if not UserInputInterface().prompt_bool(user_msg, False):
                    sys.exit(os.EX_OK)
                self._sb_warn_size = 0

        reqs = self._broker_site.broker(task.get_requirement_list(jobnum),
                                        WMS.SITES)

        def _format_str_list(str_list):
            return '{ %s }' % str.join(', ',
                                       imap(lambda x: '"%s"' % x, str_list))

        contents = {
            'Executable': '"gc-run.sh"',
            'Arguments': '"%d"' % jobnum,
            'StdOutput': '"gc.stdout"',
            'StdError': '"gc.stderr"',
            'InputSandbox': _format_str_list(sb_in_src_list + [job_config_fn]),
            'OutputSandbox': _format_str_list(sb_out_fn_list),
            'VirtualOrganisation': '"%s"' % self._vo,
            'Rank': '-other.GlueCEStateEstimatedResponseTime',
            'RetryCount': 2
        }
        return self._jdl_writer.format(reqs, contents)
Exemplo n.º 7
0
	def __init__(self, config, name, task):
		NamedPlugin.__init__(self, config, name)
		self._local_event_handler = config.get_composited_plugin(
			['local monitor', 'local event handler'], 'logmonitor', 'MultiLocalEventHandler',
			cls=LocalEventHandler, bind_kwargs={'tags': [self, task]},
			require_plugin=False, on_change=None)
		self._local_event_handler = self._local_event_handler or LocalEventHandler(None, '')
		self._log = logging.getLogger('jobs.manager')

		self._njobs_limit = config.get_int('jobs', -1, on_change=None)
		self._njobs_inflight = config.get_int('in flight', -1, on_change=None)
		self._njobs_inqueue = config.get_int('in queue', -1, on_change=None)

		self._chunks_enabled = config.get_bool('chunks enabled', True, on_change=None)
		self._chunks_submit = config.get_int('chunks submit', 100, on_change=None)
		self._chunks_check = config.get_int('chunks check', 100, on_change=None)
		self._chunks_retrieve = config.get_int('chunks retrieve', 100, on_change=None)

		self._timeout_unknown = config.get_time('unknown timeout', -1, on_change=None)
		self._timeout_queue = config.get_time('queue timeout', -1, on_change=None)
		self._job_retries = config.get_int('max retry', -1, on_change=None)

		selected = JobSelector.create(config.get('selected', '', on_change=None), task=task)
		self.job_db = config.get_plugin('job database', 'TextFileJobDB',
			cls=JobDB, pargs=(self._get_max_jobs(task), selected), on_change=None)
		self._disabled_jobs_logfile = config.get_work_path('disabled')
		self._output_processor = config.get_plugin('output processor', 'SandboxProcessor',
			cls=TaskOutputProcessor, on_change=None)

		self._uii = UserInputInterface()
		self._interactive_cancel = config.is_interactive(['delete jobs', 'cancel jobs'], True)
		self._interactive_reset = config.is_interactive('reset jobs', True)
		self._do_shuffle = config.get_bool('shuffle', False, on_change=None)
		self._abort_report = config.get_plugin('abort report', 'LocationReport',
			cls=Report, pargs=(self.job_db, task), on_change=None)
		self._show_blocker = True
		self._callback_list = []
Exemplo n.º 8
0
class JobManager(NamedPlugin):  # pylint:disable=too-many-instance-attributes
	config_section_list = NamedPlugin.config_section_list + ['jobs']
	config_tag_name = 'jobmgr'
	alias_list = ['NullJobManager']

	def __init__(self, config, name, task):
		NamedPlugin.__init__(self, config, name)
		self._local_event_handler = config.get_composited_plugin(
			['local monitor', 'local event handler'], 'logmonitor', 'MultiLocalEventHandler',
			cls=LocalEventHandler, bind_kwargs={'tags': [self, task]},
			require_plugin=False, on_change=None)
		self._local_event_handler = self._local_event_handler or LocalEventHandler(None, '')
		self._log = logging.getLogger('jobs.manager')

		self._njobs_limit = config.get_int('jobs', -1, on_change=None)
		self._njobs_inflight = config.get_int('in flight', -1, on_change=None)
		self._njobs_inqueue = config.get_int('in queue', -1, on_change=None)

		self._chunks_enabled = config.get_bool('chunks enabled', True, on_change=None)
		self._chunks_submit = config.get_int('chunks submit', 100, on_change=None)
		self._chunks_check = config.get_int('chunks check', 100, on_change=None)
		self._chunks_retrieve = config.get_int('chunks retrieve', 100, on_change=None)

		self._timeout_unknown = config.get_time('unknown timeout', -1, on_change=None)
		self._timeout_queue = config.get_time('queue timeout', -1, on_change=None)
		self._job_retries = config.get_int('max retry', -1, on_change=None)

		selected = JobSelector.create(config.get('selected', '', on_change=None), task=task)
		self.job_db = config.get_plugin('job database', 'TextFileJobDB',
			cls=JobDB, pargs=(self._get_max_jobs(task), selected), on_change=None)
		self._disabled_jobs_logfile = config.get_work_path('disabled')
		self._output_processor = config.get_plugin('output processor', 'SandboxProcessor',
			cls=TaskOutputProcessor, on_change=None)

		self._uii = UserInputInterface()
		self._interactive_cancel = config.is_interactive(['delete jobs', 'cancel jobs'], True)
		self._interactive_reset = config.is_interactive('reset jobs', True)
		self._do_shuffle = config.get_bool('shuffle', False, on_change=None)
		self._abort_report = config.get_plugin('abort report', 'LocationReport',
			cls=Report, pargs=(self.job_db, task), on_change=None)
		self._show_blocker = True
		self._callback_list = []

	def add_event_handler(self, callback):
		self._callback_list.append(callback)

	def cancel(self, task, wms, select):
		selector = AndJobSelector(ClassSelector(JobClass.PROCESSING),
			JobSelector.create(select, task=task))
		jobs = self.job_db.get_job_list(selector)
		if jobs:
			self._log.warning('Cancelling the following jobs:')
			self._cancel(task, wms, jobs, interactive=self._interactive_cancel, show_jobs=True)

	def check(self, task, wms):
		jobnum_list = self._sample(self.job_db.get_job_list(ClassSelector(JobClass.PROCESSING)),
			self._get_chunk_size(self._chunks_check))

		# Check jobs in the jobnum_list and return changes, timeouts and successfully reported jobs
		(change, jobnum_list_timeout, reported) = self._check_get_jobnum_list(task, wms, jobnum_list)
		unreported = len(jobnum_list) - len(reported)
		if unreported > 0:
			self._log.log_time(logging.CRITICAL, '%d job(s) did not report their status!', unreported)
		if change is None:  # neither True or False => abort
			return False

		# Cancel jobs which took too long
		if len(jobnum_list_timeout):
			change = True
			self._log.warning('Timeout for the following jobs:')
			self._cancel(task, wms, jobnum_list_timeout, interactive=False, show_jobs=True)

		# Process task interventions
		self._process_intervention(task, wms)

		# Quit when all jobs are finished
		if self.job_db.get_job_len(ClassSelector(JobClass.ENDSTATE)) == len(self.job_db):
			self._log_disabled_jobs()
			if task.can_finish():
				self._local_event_handler.on_task_finish(task, len(self.job_db))
				abort(True)

		return change

	def finish(self):
		self._local_event_handler.on_workflow_finish()

	def remove_event_handler(self, callback):
		self._callback_list.remove(callback)

	def reset(self, task, wms, select):
		jobnum_list = self.job_db.get_job_list(JobSelector.create(select, task=task))
		if jobnum_list:
			self._log.warning('Resetting the following jobs:')
			self._abort_report.show_report(self.job_db, jobnum_list)
			ask_user_msg = 'Are you sure you want to reset the state of these jobs?'
			if self._interactive_reset or self._uii.prompt_bool(ask_user_msg, False):
				self._cancel(task, wms, self.job_db.get_job_list(
					ClassSelector(JobClass.PROCESSING), jobnum_list), interactive=False, show_jobs=False)
				for jobnum in jobnum_list:
					self.job_db.commit(jobnum, Job())

	def retrieve(self, task, wms):
		change = False
		jobnum_list = self._sample(self.job_db.get_job_list(ClassSelector(JobClass.DONE)),
			self._get_chunk_size(self._chunks_retrieve))

		job_output_iter = wms.retrieve_jobs(self._get_wms_args(jobnum_list))
		for (jobnum, exit_code, data, outputdir) in job_output_iter:
			job_obj = self.job_db.get_job(jobnum)
			if job_obj is None:
				continue

			if exit_code == 0:
				state = Job.SUCCESS
			elif exit_code == 107:  # set ABORTED instead of FAILED for errorcode 107
				state = Job.ABORTED
			else:
				state = Job.FAILED

			if state == Job.SUCCESS:
				if not self._output_processor.process(outputdir, task):
					exit_code = 108
					state = Job.FAILED

			if state != job_obj.state:
				change = True
				job_obj.set('retcode', exit_code)
				job_obj.set('runtime', data.get('TIME', -1))
				self._update(task, job_obj, jobnum, state)
				self._local_event_handler.on_job_output(task, wms, job_obj, jobnum, exit_code)

			if abort():
				return False

		return change

	def submit(self, task, wms):
		jobnum_list = self._submit_get_jobs(task)
		if len(jobnum_list) == 0:
			return False

		submitted = []
		for (jobnum, gc_id, data) in wms.submit_jobs(jobnum_list, task):
			submitted.append(jobnum)
			job_obj = self.job_db.get_job_persistent(jobnum)
			job_obj.clear_old_state()

			if gc_id is None:
				# Could not register at WMS
				self._update(task, job_obj, jobnum, Job.FAILED)
				continue

			job_obj.assign_id(gc_id)
			for (key, value) in data.items():
				job_obj.set(key, value)

			self._update(task, job_obj, jobnum, Job.SUBMITTED)
			self._local_event_handler.on_job_submit(task, wms, job_obj, jobnum)
			if abort():
				return False
		return len(submitted) != 0

	def _cancel(self, task, wms, jobnum_list, interactive, show_jobs):
		if len(jobnum_list) == 0:
			return
		if show_jobs:
			self._abort_report.show_report(self.job_db, jobnum_list)
		if interactive and not self._uii.prompt_bool('Do you really want to cancel these jobs?', True):
			return

		def _mark_cancelled(jobnum):
			job_obj = self.job_db.get_job(jobnum)
			if job_obj is not None:
				self._update(task, job_obj, jobnum, Job.CANCELLED)
				self._local_event_handler.on_job_update(task, wms, job_obj, jobnum, {'reason': 'cancelled'})

		jobnum_list.reverse()
		map_gc_id2jobnum = self._get_map_gc_id_jobnum(jobnum_list)
		gc_id_list = sorted(map_gc_id2jobnum, key=lambda gc_id: -map_gc_id2jobnum[gc_id])
		for (gc_id,) in wms.cancel_jobs(gc_id_list):
			# Remove cancelledd job from todo list and mark as cancelled
			_mark_cancelled(map_gc_id2jobnum.pop(gc_id))

		if map_gc_id2jobnum:
			jobnum_list = list(map_gc_id2jobnum.values())
			self._log.warning('There was a problem with cancelling the following jobs:')
			self._abort_report.show_report(self.job_db, jobnum_list)
			if (not interactive) or self._uii.prompt_bool('Do you want to mark them as cancelled?', True):
				lmap(_mark_cancelled, jobnum_list)
		if interactive:
			wait(2)

	def _check_get_jobnum_list(self, task, wms, jobnum_list):
		(change, jobnum_list_timeout, reported) = (False, [], [])
		if not jobnum_list:
			return (change, jobnum_list_timeout, reported)
		for (jobnum, job_obj, state, info) in self._check_jobs_raw(wms, jobnum_list):
			if state != Job.UNKNOWN:
				reported.append(jobnum)
			if state != job_obj.state:
				change = True
				for (key, value) in info.items():
					job_obj.set(key, value)
				self._update(task, job_obj, jobnum, state)
				self._local_event_handler.on_job_update(task, wms, job_obj, jobnum, info)
			else:
				# If a job stays too long in an inital state, cancel it
				if job_obj.state in (Job.SUBMITTED, Job.WAITING, Job.READY, Job.QUEUED):
					if self._timeout_queue > 0 and time.time() - job_obj.submitted > self._timeout_queue:
						jobnum_list_timeout.append(jobnum)
				if job_obj.state == Job.UNKNOWN:
					if self._timeout_unknown > 0 and time.time() - job_obj.submitted > self._timeout_unknown:
						jobnum_list_timeout.append(jobnum)
			if abort():
				return (None, jobnum_list_timeout, reported)
		return (change, jobnum_list_timeout, reported)

	def _check_jobs_raw(self, wms, jobnum_list):
		# ask wms and yield (jobnum, job_obj, job_status, job_info)
		map_gc_id2jobnum = self._get_map_gc_id_jobnum(jobnum_list)
		for (gc_id, job_state, job_info) in wms.check_jobs(map_gc_id2jobnum.keys()):
			if not abort():
				jobnum = map_gc_id2jobnum.pop(gc_id, None)
				if jobnum is not None:
					yield (jobnum, self.job_db.get_job(jobnum), job_state, job_info)
		for jobnum in map_gc_id2jobnum.values():  # missing jobs are returned with Job.UNKNOWN state
			yield (jobnum, self.job_db.get_job(jobnum), Job.UNKNOWN, {})

	def _get_chunk_size(self, user_size, default=-1):
		if self._chunks_enabled and (user_size > 0):
			return user_size
		return default

	def _get_enabled_jobs(self, task, jobnum_list_ready):
		(n_mod_ok, n_retry_ok, jobnum_list_enabled) = (0, 0, [])
		for jobnum in jobnum_list_ready:
			job_obj = self.job_db.get_job_transient(jobnum)
			can_retry = (self._job_retries < 0) or (job_obj.attempt - 1 < self._job_retries)
			can_submit = task.can_submit(jobnum)
			if can_retry:
				n_retry_ok += 1
			if can_submit:
				n_mod_ok += 1
			if can_submit and can_retry:
				jobnum_list_enabled.append(jobnum)
			if can_submit and (job_obj.state == Job.DISABLED):  # recover jobs
				self._update(task, job_obj, jobnum, Job.INIT, reason='reenabled by task module')
			elif not can_submit and (job_obj.state != Job.DISABLED):  # disable invalid jobs
				self._update(task, self.job_db.get_job_persistent(jobnum),
					jobnum, Job.DISABLED, reason='disabled by task module')
		return (n_mod_ok, n_retry_ok, jobnum_list_enabled)

	def _get_map_gc_id_jobnum(self, jobnum_list):
		return dict(imap(lambda jobnum: (self.job_db.get_job(jobnum).gc_id, jobnum), jobnum_list))

	def _get_max_jobs(self, task):
		njobs_user = self._njobs_limit
		njobs_task = task.get_job_len()
		if njobs_task is None:  # Task module doesn't define a maximum number of jobs
			if njobs_user < 0:  # User didn't specify a maximum number of jobs
				raise ConfigError('Task module doesn\'t provide max number of Jobs. ' +
					'User specified number of jobs needed!')
			elif njobs_user >= 0:  # Run user specified number of jobs
				return njobs_user
		if njobs_user < 0:  # No user specified limit => run all jobs
			return njobs_task
		njobs_min = min(njobs_user, njobs_task)
		if njobs_user < njobs_task:
			self._log.warning('Maximum number of jobs in task (%d) was truncated to %d',
				njobs_task, njobs_min)
		return njobs_min

	def _get_wms_args(self, jobnum_list):
		return lmap(lambda jobnum: (self.job_db.get_job(jobnum).gc_id, jobnum), jobnum_list)

	def _log_disabled_jobs(self):
		disabled = self.job_db.get_job_list(ClassSelector(JobClass.DISABLED))
		try:
			with_file(SafeFile(self._disabled_jobs_logfile, 'w'),
				lambda fp: fp.write(str.join('\n', imap(str, disabled))))
		except Exception:
			raise JobError('Could not write disabled jobs to file %s!' % self._disabled_jobs_logfile)
		if disabled:
			self._log.log_time(logging.WARNING, 'There are %d disabled jobs in this task!', len(disabled))
			self._log.log_time(logging.DEBUG,
				'Please refer to %s for a complete list of disabled jobs.', self._disabled_jobs_logfile)

	def _process_intervention(self, task, wms):
		# Process changes of job states requested by task module
		resetable_state_list = [Job.INIT, Job.DISABLED, Job.ABORTED,
			Job.CANCELLED, Job.DONE, Job.FAILED, Job.SUCCESS]

		def _reset_state(jobnum_list, state_new):
			jobnum_listet = set(jobnum_list)
			for jobnum in jobnum_list:
				job_obj = self.job_db.get_job_persistent(jobnum)
				if job_obj.state in resetable_state_list:
					self._update(task, job_obj, jobnum, state_new)
					jobnum_listet.remove(jobnum)
					job_obj.attempt = 0

			if len(jobnum_listet) > 0:
				raise JobError('For the following jobs it was not possible to reset the state to %s:\n%s' % (
					Job.enum2str(state_new), str.join(', ', imap(str, jobnum_listet))))

		(redo, disable, size_change) = task.get_intervention()
		if (not redo) and (not disable) and (not size_change):
			return
		self._log.log_time(logging.INFO, 'The task module has requested changes to the job database')
		max_job_len_new = self._get_max_jobs(task)
		applied_change = False
		if max_job_len_new != len(self.job_db):
			self._log.log_time(logging.INFO,
				'Number of jobs changed from %d to %d', len(self.job_db), max_job_len_new)
			self.job_db.set_job_limit(max_job_len_new)
			applied_change = True
		if redo:
			self._cancel(task, wms, self.job_db.get_job_list(
				ClassSelector(JobClass.PROCESSING), redo), interactive=False, show_jobs=True)
			_reset_state(redo, Job.INIT)
			applied_change = True
		if disable:
			self._cancel(task, wms, self.job_db.get_job_list(
				ClassSelector(JobClass.PROCESSING), disable), interactive=False, show_jobs=True)
			_reset_state(disable, Job.DISABLED)
			applied_change = True
		if applied_change:
			self._log.log_time(logging.INFO, 'All requested changes are applied')

	def _sample(self, jobnum_list, size):
		if size >= 0:
			jobnum_list = random.sample(jobnum_list, min(size, len(jobnum_list)))
		return sorted(jobnum_list)

	def _submit_get_jobs(self, task):
		# Get list of submittable jobs
		jobnum_list_ready = self.job_db.get_job_list(ClassSelector(JobClass.SUBMIT_CANDIDATES))
		(n_mod_ok, n_retry_ok, jobnum_list) = self._get_enabled_jobs(task, jobnum_list_ready)

		if self._show_blocker and jobnum_list_ready and not jobnum_list:  # No submission but ready jobs
			err_str_list = []
			if (n_retry_ok <= 0) or (n_mod_ok != 0):
				err_str_list.append('have hit their maximum number of retries')
			if (n_retry_ok != 0) and (n_mod_ok <= 0):
				err_str_list.append('are vetoed by the task module')
			err_delim = ' and '
			if n_retry_ok or n_mod_ok:
				err_delim = ' or '
			self._log.log_time(logging.WARNING, 'All remaining jobs %s!', str.join(err_delim, err_str_list))
		self._show_blocker = not (len(jobnum_list_ready) > 0 and len(jobnum_list) == 0)

		# Determine number of jobs to submit
		submit = len(jobnum_list)
		if self._njobs_inqueue > 0:
			submit = min(submit, self._njobs_inqueue - self.job_db.get_job_len(
				ClassSelector(JobClass.ATWMS)))
		if self._njobs_inflight > 0:
			submit = min(submit, self._njobs_inflight - self.job_db.get_job_len(
				ClassSelector(JobClass.PROCESSING)))
		if self._chunks_enabled and (self._chunks_submit > 0):
			submit = min(submit, self._chunks_submit)
		submit = max(submit, 0)

		if self._do_shuffle:
			return self._sample(jobnum_list, submit)
		return sorted(jobnum_list)[:submit]

	def _update(self, task, job_obj, jobnum, new_state, show_wms=False, reason=None):
		old_state = job_obj.state
		if old_state != new_state:
			job_obj.update(new_state)
			self.job_db.commit(jobnum, job_obj)
			self._local_event_handler.on_job_state_change(task, len(self.job_db), jobnum, job_obj,
				old_state, new_state, reason)
			for callback in self._callback_list:
				callback()
Exemplo n.º 9
0
    def __init__(self, config, name, task):
        NamedPlugin.__init__(self, config, name)
        self._local_event_handler = config.get_composited_plugin(
            ['local monitor', 'local event handler'],
            'logmonitor',
            'MultiLocalEventHandler',
            cls=LocalEventHandler,
            bind_kwargs={'tags': [self, task]},
            require_plugin=False,
            on_change=None)
        self._local_event_handler = self._local_event_handler or LocalEventHandler(
            None, '')
        self._log = logging.getLogger('jobs.manager')

        self._njobs_limit = config.get_int('jobs', -1, on_change=None)
        self._njobs_inflight = config.get_int('in flight', -1, on_change=None)
        self._njobs_inqueue = config.get_int('in queue', -1, on_change=None)

        self._chunks_enabled = config.get_bool('chunks enabled',
                                               True,
                                               on_change=None)
        self._chunks_submit = config.get_int('chunks submit',
                                             100,
                                             on_change=None)
        self._chunks_check = config.get_int('chunks check',
                                            100,
                                            on_change=None)
        self._chunks_retrieve = config.get_int('chunks retrieve',
                                               100,
                                               on_change=None)

        self._timeout_unknown = config.get_time('unknown timeout',
                                                -1,
                                                on_change=None)
        self._timeout_queue = config.get_time('queue timeout',
                                              -1,
                                              on_change=None)
        self._job_retries = config.get_int('max retry', -1, on_change=None)

        selected = JobSelector.create(config.get('selected',
                                                 '',
                                                 on_change=None),
                                      task=task)
        self.job_db = config.get_plugin('job database',
                                        'TextFileJobDB',
                                        cls=JobDB,
                                        pargs=(self._get_max_jobs(task),
                                               selected),
                                        on_change=None)
        self._disabled_jobs_logfile = config.get_work_path('disabled')
        self._output_processor = config.get_plugin('output processor',
                                                   'SandboxProcessor',
                                                   cls=TaskOutputProcessor,
                                                   on_change=None)

        self._uii = UserInputInterface()
        self._interactive_cancel = config.is_interactive(
            ['delete jobs', 'cancel jobs'], True)
        self._interactive_reset = config.is_interactive('reset jobs', True)
        self._do_shuffle = config.get_bool('shuffle', False, on_change=None)
        self._abort_report = config.get_plugin('abort report',
                                               'LocationReport',
                                               cls=Report,
                                               pargs=(self.job_db, task),
                                               on_change=None)
        self._show_blocker = True
        self._callback_list = []
Exemplo n.º 10
0
class JobManager(NamedPlugin):  # pylint:disable=too-many-instance-attributes
    config_section_list = NamedPlugin.config_section_list + ['jobs']
    config_tag_name = 'jobmgr'
    alias_list = ['NullJobManager']

    def __init__(self, config, name, task):
        NamedPlugin.__init__(self, config, name)
        self._local_event_handler = config.get_composited_plugin(
            ['local monitor', 'local event handler'],
            'logmonitor',
            'MultiLocalEventHandler',
            cls=LocalEventHandler,
            bind_kwargs={'tags': [self, task]},
            require_plugin=False,
            on_change=None)
        self._local_event_handler = self._local_event_handler or LocalEventHandler(
            None, '')
        self._log = logging.getLogger('jobs.manager')

        self._njobs_limit = config.get_int('jobs', -1, on_change=None)
        self._njobs_inflight = config.get_int('in flight', -1, on_change=None)
        self._njobs_inqueue = config.get_int('in queue', -1, on_change=None)

        self._chunks_enabled = config.get_bool('chunks enabled',
                                               True,
                                               on_change=None)
        self._chunks_submit = config.get_int('chunks submit',
                                             100,
                                             on_change=None)
        self._chunks_check = config.get_int('chunks check',
                                            100,
                                            on_change=None)
        self._chunks_retrieve = config.get_int('chunks retrieve',
                                               100,
                                               on_change=None)

        self._timeout_unknown = config.get_time('unknown timeout',
                                                -1,
                                                on_change=None)
        self._timeout_queue = config.get_time('queue timeout',
                                              -1,
                                              on_change=None)
        self._job_retries = config.get_int('max retry', -1, on_change=None)

        selected = JobSelector.create(config.get('selected',
                                                 '',
                                                 on_change=None),
                                      task=task)
        self.job_db = config.get_plugin('job database',
                                        'TextFileJobDB',
                                        cls=JobDB,
                                        pargs=(self._get_max_jobs(task),
                                               selected),
                                        on_change=None)
        self._disabled_jobs_logfile = config.get_work_path('disabled')
        self._output_processor = config.get_plugin('output processor',
                                                   'SandboxProcessor',
                                                   cls=TaskOutputProcessor,
                                                   on_change=None)

        self._uii = UserInputInterface()
        self._interactive_cancel = config.is_interactive(
            ['delete jobs', 'cancel jobs'], True)
        self._interactive_reset = config.is_interactive('reset jobs', True)
        self._do_shuffle = config.get_bool('shuffle', False, on_change=None)
        self._abort_report = config.get_plugin('abort report',
                                               'LocationReport',
                                               cls=Report,
                                               pargs=(self.job_db, task),
                                               on_change=None)
        self._show_blocker = True
        self._callback_list = []

    def add_event_handler(self, callback):
        self._callback_list.append(callback)

    def cancel(self, task, wms, select):
        selector = AndJobSelector(ClassSelector(JobClass.PROCESSING),
                                  JobSelector.create(select, task=task))
        jobs = self.job_db.get_job_list(selector)
        if jobs:
            self._log.warning('Cancelling the following jobs:')
            self._cancel(task,
                         wms,
                         jobs,
                         interactive=self._interactive_cancel,
                         show_jobs=True)

    def check(self, task, wms):
        jobnum_list = self._sample(
            self.job_db.get_job_list(ClassSelector(JobClass.PROCESSING)),
            self._get_chunk_size(self._chunks_check))

        # Check jobs in the jobnum_list and return changes, timeouts and successfully reported jobs
        (change, jobnum_list_timeout,
         reported) = self._check_get_jobnum_list(task, wms, jobnum_list)
        unreported = len(jobnum_list) - len(reported)
        if unreported > 0:
            self._log.log_time(logging.CRITICAL,
                               '%d job(s) did not report their status!',
                               unreported)
        if change is None:  # neither True or False => abort
            return False

        # Cancel jobs which took too long
        if len(jobnum_list_timeout):
            change = True
            self._log.warning('Timeout for the following jobs:')
            self._cancel(task,
                         wms,
                         jobnum_list_timeout,
                         interactive=False,
                         show_jobs=True)

        # Process task interventions
        self._process_intervention(task, wms)

        # Quit when all jobs are finished
        if self.job_db.get_job_len(ClassSelector(JobClass.ENDSTATE)) == len(
                self.job_db):
            self._log_disabled_jobs()
            if task.can_finish():
                self._local_event_handler.on_task_finish(
                    task, len(self.job_db))
                abort(True)

        return change

    def finish(self):
        self._local_event_handler.on_workflow_finish()

    def remove_event_handler(self, callback):
        self._callback_list.remove(callback)

    def reset(self, task, wms, select):
        jobnum_list = self.job_db.get_job_list(
            JobSelector.create(select, task=task))
        if jobnum_list:
            self._log.warning('Resetting the following jobs:')
            self._abort_report.show_report(self.job_db, jobnum_list)
            ask_user_msg = 'Are you sure you want to reset the state of these jobs?'
            if self._interactive_reset or self._uii.prompt_bool(
                    ask_user_msg, False):
                self._cancel(task,
                             wms,
                             self.job_db.get_job_list(
                                 ClassSelector(JobClass.PROCESSING),
                                 jobnum_list),
                             interactive=False,
                             show_jobs=False)
                for jobnum in jobnum_list:
                    self.job_db.commit(jobnum, Job())

    def retrieve(self, task, wms):
        change = False
        jobnum_list = self._sample(
            self.job_db.get_job_list(ClassSelector(JobClass.DONE)),
            self._get_chunk_size(self._chunks_retrieve))

        job_output_iter = wms.retrieve_jobs(self._get_wms_args(jobnum_list))
        for (jobnum, exit_code, data, outputdir) in job_output_iter:
            job_obj = self.job_db.get_job(jobnum)
            if job_obj is None:
                continue

            if exit_code == 0:
                state = Job.SUCCESS
            elif exit_code == 107:  # set ABORTED instead of FAILED for errorcode 107
                state = Job.ABORTED
            else:
                state = Job.FAILED

            if state == Job.SUCCESS:
                if not self._output_processor.process(outputdir, task):
                    exit_code = 108
                    state = Job.FAILED

            if state != job_obj.state:
                change = True
                job_obj.set('retcode', exit_code)
                job_obj.set('runtime', data.get('TIME', -1))
                self._update(task, job_obj, jobnum, state)
                self._local_event_handler.on_job_output(
                    task, wms, job_obj, jobnum, exit_code)

            if abort():
                return False

        return change

    def submit(self, task, wms):
        jobnum_list = self._submit_get_jobs(task)
        if len(jobnum_list) == 0:
            return False

        submitted = []
        for (jobnum, gc_id, data) in wms.submit_jobs(jobnum_list, task):
            submitted.append(jobnum)
            job_obj = self.job_db.get_job_persistent(jobnum)
            job_obj.clear_old_state()

            if gc_id is None:
                # Could not register at WMS
                self._update(task, job_obj, jobnum, Job.FAILED)
                continue

            job_obj.assign_id(gc_id)
            for (key, value) in data.items():
                job_obj.set(key, value)

            self._update(task, job_obj, jobnum, Job.SUBMITTED)
            self._local_event_handler.on_job_submit(task, wms, job_obj, jobnum)
            if abort():
                return False
        return len(submitted) != 0

    def _cancel(self, task, wms, jobnum_list, interactive, show_jobs):
        if len(jobnum_list) == 0:
            return
        if show_jobs:
            self._abort_report.show_report(self.job_db, jobnum_list)
        if interactive and not self._uii.prompt_bool(
                'Do you really want to cancel these jobs?', True):
            return

        def _mark_cancelled(jobnum):
            job_obj = self.job_db.get_job(jobnum)
            if job_obj is not None:
                self._update(task, job_obj, jobnum, Job.CANCELLED)
                self._local_event_handler.on_job_update(
                    task, wms, job_obj, jobnum, {'reason': 'cancelled'})

        jobnum_list.reverse()
        map_gc_id2jobnum = self._get_map_gc_id_jobnum(jobnum_list)
        gc_id_list = sorted(map_gc_id2jobnum,
                            key=lambda gc_id: -map_gc_id2jobnum[gc_id])
        for (gc_id, ) in wms.cancel_jobs(gc_id_list):
            # Remove cancelledd job from todo list and mark as cancelled
            _mark_cancelled(map_gc_id2jobnum.pop(gc_id))

        if map_gc_id2jobnum:
            jobnum_list = list(map_gc_id2jobnum.values())
            self._log.warning(
                'There was a problem with cancelling the following jobs:')
            self._abort_report.show_report(self.job_db, jobnum_list)
            if (not interactive) or self._uii.prompt_bool(
                    'Do you want to mark them as cancelled?', True):
                lmap(_mark_cancelled, jobnum_list)
        if interactive:
            wait(2)

    def _check_get_jobnum_list(self, task, wms, jobnum_list):
        (change, jobnum_list_timeout, reported) = (False, [], [])
        if not jobnum_list:
            return (change, jobnum_list_timeout, reported)
        for (jobnum, job_obj, state,
             info) in self._check_jobs_raw(wms, jobnum_list):
            if state != Job.UNKNOWN:
                reported.append(jobnum)
            if state != job_obj.state:
                change = True
                for (key, value) in info.items():
                    job_obj.set(key, value)
                self._update(task, job_obj, jobnum, state)
                self._local_event_handler.on_job_update(
                    task, wms, job_obj, jobnum, info)
            else:
                # If a job stays too long in an inital state, cancel it
                if job_obj.state in (Job.SUBMITTED, Job.WAITING, Job.READY,
                                     Job.QUEUED):
                    if self._timeout_queue > 0 and time.time(
                    ) - job_obj.submitted > self._timeout_queue:
                        jobnum_list_timeout.append(jobnum)
                if job_obj.state == Job.UNKNOWN:
                    if self._timeout_unknown > 0 and time.time(
                    ) - job_obj.submitted > self._timeout_unknown:
                        jobnum_list_timeout.append(jobnum)
            if abort():
                return (None, jobnum_list_timeout, reported)
        return (change, jobnum_list_timeout, reported)

    def _check_jobs_raw(self, wms, jobnum_list):
        # ask wms and yield (jobnum, job_obj, job_status, job_info)
        map_gc_id2jobnum = self._get_map_gc_id_jobnum(jobnum_list)
        for (gc_id, job_state,
             job_info) in wms.check_jobs(map_gc_id2jobnum.keys()):
            if not abort():
                jobnum = map_gc_id2jobnum.pop(gc_id, None)
                if jobnum is not None:
                    yield (jobnum, self.job_db.get_job(jobnum), job_state,
                           job_info)
        for jobnum in map_gc_id2jobnum.values(
        ):  # missing jobs are returned with Job.UNKNOWN state
            yield (jobnum, self.job_db.get_job(jobnum), Job.UNKNOWN, {})

    def _get_chunk_size(self, user_size, default=-1):
        if self._chunks_enabled and (user_size > 0):
            return user_size
        return default

    def _get_enabled_jobs(self, task, jobnum_list_ready):
        (n_mod_ok, n_retry_ok, jobnum_list_enabled) = (0, 0, [])
        for jobnum in jobnum_list_ready:
            job_obj = self.job_db.get_job_transient(jobnum)
            can_retry = (self._job_retries < 0) or (job_obj.attempt - 1 <
                                                    self._job_retries)
            can_submit = task.can_submit(jobnum)
            if can_retry:
                n_retry_ok += 1
            if can_submit:
                n_mod_ok += 1
            if can_submit and can_retry:
                jobnum_list_enabled.append(jobnum)
            if can_submit and (job_obj.state == Job.DISABLED):  # recover jobs
                self._update(task,
                             job_obj,
                             jobnum,
                             Job.INIT,
                             reason='reenabled by task module')
            elif not can_submit and (job_obj.state !=
                                     Job.DISABLED):  # disable invalid jobs
                self._update(task,
                             self.job_db.get_job_persistent(jobnum),
                             jobnum,
                             Job.DISABLED,
                             reason='disabled by task module')
        return (n_mod_ok, n_retry_ok, jobnum_list_enabled)

    def _get_map_gc_id_jobnum(self, jobnum_list):
        return dict(
            imap(lambda jobnum: (self.job_db.get_job(jobnum).gc_id, jobnum),
                 jobnum_list))

    def _get_max_jobs(self, task):
        njobs_user = self._njobs_limit
        njobs_task = task.get_job_len()
        if njobs_task is None:  # Task module doesn't define a maximum number of jobs
            if njobs_user < 0:  # User didn't specify a maximum number of jobs
                raise ConfigError(
                    'Task module doesn\'t provide max number of Jobs. ' +
                    'User specified number of jobs needed!')
            elif njobs_user >= 0:  # Run user specified number of jobs
                return njobs_user
        if njobs_user < 0:  # No user specified limit => run all jobs
            return njobs_task
        njobs_min = min(njobs_user, njobs_task)
        if njobs_user < njobs_task:
            self._log.warning(
                'Maximum number of jobs in task (%d) was truncated to %d',
                njobs_task, njobs_min)
        return njobs_min

    def _get_wms_args(self, jobnum_list):
        return lmap(lambda jobnum: (self.job_db.get_job(jobnum).gc_id, jobnum),
                    jobnum_list)

    def _log_disabled_jobs(self):
        disabled = self.job_db.get_job_list(ClassSelector(JobClass.DISABLED))
        try:
            with_file(SafeFile(self._disabled_jobs_logfile, 'w'),
                      lambda fp: fp.write(str.join('\n', imap(str, disabled))))
        except Exception:
            raise JobError('Could not write disabled jobs to file %s!' %
                           self._disabled_jobs_logfile)
        if disabled:
            self._log.log_time(logging.WARNING,
                               'There are %d disabled jobs in this task!',
                               len(disabled))
            self._log.log_time(
                logging.DEBUG,
                'Please refer to %s for a complete list of disabled jobs.',
                self._disabled_jobs_logfile)

    def _process_intervention(self, task, wms):
        # Process changes of job states requested by task module
        resetable_state_list = [
            Job.INIT, Job.DISABLED, Job.ABORTED, Job.CANCELLED, Job.DONE,
            Job.FAILED, Job.SUCCESS
        ]

        def _reset_state(jobnum_list, state_new):
            jobnum_listet = set(jobnum_list)
            for jobnum in jobnum_list:
                job_obj = self.job_db.get_job_persistent(jobnum)
                if job_obj.state in resetable_state_list:
                    self._update(task, job_obj, jobnum, state_new)
                    jobnum_listet.remove(jobnum)
                    job_obj.attempt = 0

            if len(jobnum_listet) > 0:
                raise JobError(
                    'For the following jobs it was not possible to reset the state to %s:\n%s'
                    % (Job.enum2str(state_new),
                       str.join(', ', imap(str, jobnum_listet))))

        (redo, disable, size_change) = task.get_intervention()
        if (not redo) and (not disable) and (not size_change):
            return
        self._log.log_time(
            logging.INFO,
            'The task module has requested changes to the job database')
        max_job_len_new = self._get_max_jobs(task)
        applied_change = False
        if max_job_len_new != len(self.job_db):
            self._log.log_time(logging.INFO,
                               'Number of jobs changed from %d to %d',
                               len(self.job_db), max_job_len_new)
            self.job_db.set_job_limit(max_job_len_new)
            applied_change = True
        if redo:
            self._cancel(task,
                         wms,
                         self.job_db.get_job_list(
                             ClassSelector(JobClass.PROCESSING), redo),
                         interactive=False,
                         show_jobs=True)
            _reset_state(redo, Job.INIT)
            applied_change = True
        if disable:
            self._cancel(task,
                         wms,
                         self.job_db.get_job_list(
                             ClassSelector(JobClass.PROCESSING), disable),
                         interactive=False,
                         show_jobs=True)
            _reset_state(disable, Job.DISABLED)
            applied_change = True
        if applied_change:
            self._log.log_time(logging.INFO,
                               'All requested changes are applied')

    def _sample(self, jobnum_list, size):
        if size >= 0:
            jobnum_list = random.sample(jobnum_list,
                                        min(size, len(jobnum_list)))
        return sorted(jobnum_list)

    def _submit_get_jobs(self, task):
        # Get list of submittable jobs
        jobnum_list_ready = self.job_db.get_job_list(
            ClassSelector(JobClass.SUBMIT_CANDIDATES))
        (n_mod_ok, n_retry_ok,
         jobnum_list) = self._get_enabled_jobs(task, jobnum_list_ready)

        if self._show_blocker and jobnum_list_ready and not jobnum_list:  # No submission but ready jobs
            err_str_list = []
            if (n_retry_ok <= 0) or (n_mod_ok != 0):
                err_str_list.append('have hit their maximum number of retries')
            if (n_retry_ok != 0) and (n_mod_ok <= 0):
                err_str_list.append('are vetoed by the task module')
            err_delim = ' and '
            if n_retry_ok or n_mod_ok:
                err_delim = ' or '
            self._log.log_time(logging.WARNING, 'All remaining jobs %s!',
                               str.join(err_delim, err_str_list))
        self._show_blocker = not (len(jobnum_list_ready) > 0
                                  and len(jobnum_list) == 0)

        # Determine number of jobs to submit
        submit = len(jobnum_list)
        if self._njobs_inqueue > 0:
            submit = min(
                submit, self._njobs_inqueue -
                self.job_db.get_job_len(ClassSelector(JobClass.ATWMS)))
        if self._njobs_inflight > 0:
            submit = min(
                submit, self._njobs_inflight -
                self.job_db.get_job_len(ClassSelector(JobClass.PROCESSING)))
        if self._chunks_enabled and (self._chunks_submit > 0):
            submit = min(submit, self._chunks_submit)
        submit = max(submit, 0)

        if self._do_shuffle:
            return self._sample(jobnum_list, submit)
        return sorted(jobnum_list)[:submit]

    def _update(self,
                task,
                job_obj,
                jobnum,
                new_state,
                show_wms=False,
                reason=None):
        old_state = job_obj.state
        if old_state != new_state:
            job_obj.update(new_state)
            self.job_db.commit(jobnum, job_obj)
            self._local_event_handler.on_job_state_change(
                task, len(self.job_db), jobnum, job_obj, old_state, new_state,
                reason)
            for callback in self._callback_list:
                callback()
Exemplo n.º 11
0
    def __init__(self, config, name):
        config.set('se input timeout', '0:30')
        config.set('application', 'cmsRun', section='dashboard')
        config.set('dataset provider', 'DBS3Provider')
        config.set('dataset splitter', 'EventBoundarySplitter')
        config.set('dataset processor', 'LumiDataProcessor', '+=')
        config.set('partition processor', 'BasicPartitionProcessor', '-=')
        config.set(
            'partition processor',
            'LFNPartitionProcessor LumiPartitionProcessor CMSSWPartitionProcessor',
            '+=')

        self._needed_vn_set = set()
        SCRAMTask.__init__(self, config, name)
        self._uii = UserInputInterface()

        # Setup file path informations
        self._cmsrun_output_files = ['cmssw.dbs.tar.gz']
        if self._do_gzip_std_output:
            self._cmsrun_output_files.append('cmssw.log.gz')
        self._script_fpi = Result(path_rel='gc-run.cmssw.sh',
                                  path_abs=get_path_share(
                                      'gc-run.cmssw.sh',
                                      pkg='grid_control_cms'))

        if self._scram_project != 'CMSSW':
            raise ConfigError('Project area contains no CMSSW project')

        self._old_release_top = None
        if self._project_area:
            scram_arch_env_path = os.path.join(self._project_area, '.SCRAM',
                                               self._scram_arch, 'Environment')
            self._old_release_top = self._parse_scram_file(
                scram_arch_env_path).get('RELEASETOP')

        self._update_map_error_code2msg(
            get_path_share('gc-run.cmssw.sh', pkg='grid_control_cms'))

        self._project_area_tarball_on_se = config.get_bool(
            ['se runtime', 'se project area'], True)
        self._project_area_tarball = config.get_work_path(
            'cmssw-project-area.tar.gz')

        # Prolog / Epilog script support - warn about old syntax
        self.prolog = TaskExecutableWrapper(config, 'prolog', '')
        self.epilog = TaskExecutableWrapper(config, 'epilog', '')
        if config.get_fn_list('executable', []) != []:
            raise ConfigError(
                'Prefix executable and argument options with either prolog or epilog!'
            )
        self.arguments = config.get('arguments', '')

        # Get cmssw config files and check their existance
        # Check that for dataset jobs the necessary placeholders are in the config file
        if not self._has_dataset:
            self._events_per_job = config.get('events per job', '0')
            # this can be a variable like @USER_EVENTS@!
            self._needed_vn_set.add('MAX_EVENTS')
        fragment = config.get_fn(
            'instrumentation fragment',
            get_path_share('fragmentForCMSSW.py', pkg='grid_control_cms'))
        self._config_fn_list = self._process_config_file_list(
            config,
            config.get_fn_list('config file', self._get_config_file_default()),
            fragment,
            auto_prepare=config.get_bool('instrumentation', True),
            must_prepare=self._has_dataset)

        # Create project area tarball
        if self._project_area and not os.path.exists(
                self._project_area_tarball):
            config.set_state(True, 'init', detail='sandbox')
        # Information about search order for software environment
        self._cmssw_search_dict = self._get_cmssw_path_list(config)
        if config.get_state('init', detail='sandbox'):
            msg = 'CMSSW tarball already exists! Do you want to regenerate it?'
            if os.path.exists(
                    self._project_area_tarball) and not self._uii.prompt_bool(
                        msg, True):
                return
            # Generate CMSSW tarball
            if self._project_area:
                create_tarball(_match_files(
                    self._project_area, self._project_area_matcher,
                    self._always_matcher.create_matcher(''),
                    self._project_area_base_fn),
                               name=self._project_area_tarball)
            if self._project_area_tarball_on_se:
                config.set_state(True, 'init', detail='storage')
Exemplo n.º 12
0
class CMSSW(SCRAMTask):
    alias_list = ['']
    config_section_list = SCRAMTask.config_section_list + ['CMSSW']

    def __init__(self, config, name):
        config.set('se input timeout', '0:30')
        config.set('application', 'cmsRun', section='dashboard')
        config.set('dataset provider', 'DBS3Provider')
        config.set('dataset splitter', 'EventBoundarySplitter')
        config.set('dataset processor', 'LumiDataProcessor', '+=')
        config.set('partition processor', 'BasicPartitionProcessor', '-=')
        config.set(
            'partition processor',
            'LFNPartitionProcessor LumiPartitionProcessor CMSSWPartitionProcessor',
            '+=')

        self._needed_vn_set = set()
        SCRAMTask.__init__(self, config, name)
        self._uii = UserInputInterface()

        # Setup file path informations
        self._cmsrun_output_files = ['cmssw.dbs.tar.gz']
        if self._do_gzip_std_output:
            self._cmsrun_output_files.append('cmssw.log.gz')
        self._script_fpi = Result(path_rel='gc-run.cmssw.sh',
                                  path_abs=get_path_share(
                                      'gc-run.cmssw.sh',
                                      pkg='grid_control_cms'))

        if self._scram_project != 'CMSSW':
            raise ConfigError('Project area contains no CMSSW project')

        self._old_release_top = None
        if self._project_area:
            scram_arch_env_path = os.path.join(self._project_area, '.SCRAM',
                                               self._scram_arch, 'Environment')
            self._old_release_top = self._parse_scram_file(
                scram_arch_env_path).get('RELEASETOP')

        self._update_map_error_code2msg(
            get_path_share('gc-run.cmssw.sh', pkg='grid_control_cms'))

        self._project_area_tarball_on_se = config.get_bool(
            ['se runtime', 'se project area'], True)
        self._project_area_tarball = config.get_work_path(
            'cmssw-project-area.tar.gz')

        # Prolog / Epilog script support - warn about old syntax
        self.prolog = TaskExecutableWrapper(config, 'prolog', '')
        self.epilog = TaskExecutableWrapper(config, 'epilog', '')
        if config.get_fn_list('executable', []) != []:
            raise ConfigError(
                'Prefix executable and argument options with either prolog or epilog!'
            )
        self.arguments = config.get('arguments', '')

        # Get cmssw config files and check their existance
        # Check that for dataset jobs the necessary placeholders are in the config file
        if not self._has_dataset:
            self._events_per_job = config.get('events per job', '0')
            # this can be a variable like @USER_EVENTS@!
            self._needed_vn_set.add('MAX_EVENTS')
        fragment = config.get_fn(
            'instrumentation fragment',
            get_path_share('fragmentForCMSSW.py', pkg='grid_control_cms'))
        self._config_fn_list = self._process_config_file_list(
            config,
            config.get_fn_list('config file', self._get_config_file_default()),
            fragment,
            auto_prepare=config.get_bool('instrumentation', True),
            must_prepare=self._has_dataset)

        # Create project area tarball
        if self._project_area and not os.path.exists(
                self._project_area_tarball):
            config.set_state(True, 'init', detail='sandbox')
        # Information about search order for software environment
        self._cmssw_search_dict = self._get_cmssw_path_list(config)
        if config.get_state('init', detail='sandbox'):
            msg = 'CMSSW tarball already exists! Do you want to regenerate it?'
            if os.path.exists(
                    self._project_area_tarball) and not self._uii.prompt_bool(
                        msg, True):
                return
            # Generate CMSSW tarball
            if self._project_area:
                create_tarball(_match_files(
                    self._project_area, self._project_area_matcher,
                    self._always_matcher.create_matcher(''),
                    self._project_area_base_fn),
                               name=self._project_area_tarball)
            if self._project_area_tarball_on_se:
                config.set_state(True, 'init', detail='storage')

    def get_command(self):
        return './gc-run.cmssw.sh $@'

    def get_job_arguments(self, jobnum):
        return SCRAMTask.get_job_arguments(self, jobnum) + ' ' + self.arguments

    def get_job_dict(self, jobnum):
        # Get job dependent environment variables
        job_env_dict = SCRAMTask.get_job_dict(self, jobnum)
        if not self._has_dataset:
            job_env_dict['MAX_EVENTS'] = self._events_per_job
        job_env_dict.update(dict(self._cmssw_search_dict))
        if self._do_gzip_std_output:
            job_env_dict['GZIP_OUT'] = 'yes'
        if self._project_area_tarball_on_se:
            job_env_dict['SE_RUNTIME'] = 'yes'
        if self._project_area:
            job_env_dict['HAS_RUNTIME'] = 'yes'
        job_env_dict['CMSSW_EXEC'] = 'cmsRun'
        job_env_dict['CMSSW_CONFIG'] = str.join(
            ' ', imap(os.path.basename, self._config_fn_list))
        job_env_dict['CMSSW_OLD_RELEASETOP'] = self._old_release_top
        if self.prolog.is_active():
            job_env_dict['CMSSW_PROLOG_EXEC'] = self.prolog.get_command()
            job_env_dict['CMSSW_PROLOG_SB_IN_FILES'] = str.join(
                ' ',
                imap(lambda x: x.path_rel, self.prolog.get_sb_in_fpi_list()))
            job_env_dict['CMSSW_PROLOG_ARGS'] = self.prolog.get_arguments()
        if self.epilog.is_active():
            job_env_dict['CMSSW_EPILOG_EXEC'] = self.epilog.get_command()
            job_env_dict['CMSSW_EPILOG_SB_IN_FILES'] = str.join(
                ' ',
                imap(lambda x: x.path_rel, self.epilog.get_sb_in_fpi_list()))
            job_env_dict['CMSSW_EPILOG_ARGS'] = self.epilog.get_arguments()
        return job_env_dict

    def get_sb_in_fpi_list(self):
        # Get files for input sandbox
        fpi_list = (SCRAMTask.get_sb_in_fpi_list(self) +
                    self.prolog.get_sb_in_fpi_list() +
                    self.epilog.get_sb_in_fpi_list())
        for config_file in self._config_fn_list:
            fpi_list.append(
                Result(path_abs=config_file,
                       path_rel=os.path.basename(config_file)))
        if self._project_area and not self._project_area_tarball_on_se:
            fpi_list.append(
                Result(path_abs=self._project_area_tarball,
                       path_rel=os.path.basename(self._project_area_tarball)))
        return fpi_list + [self._script_fpi]

    def get_sb_out_fn_list(self):
        # Get files for output sandbox
        if not self._config_fn_list:
            return SCRAMTask.get_sb_out_fn_list(self)
        return SCRAMTask.get_sb_out_fn_list(self) + self._cmsrun_output_files

    def get_se_in_fn_list(self):
        # Get files to be transfered via SE (description, source, target)
        files = SCRAMTask.get_se_in_fn_list(self)
        if self._project_area and self._project_area_tarball_on_se:
            return files + [('CMSSW tarball', self._project_area_tarball,
                             self._task_id + '.tar.gz')]
        return files

    def _config_find_uninitialized(self, config, config_file_list,
                                   auto_prepare, must_prepare):
        common_path = os.path.dirname(os.path.commonprefix(config_file_list))

        config_file_list_todo = []
        config_file_status_list = []
        for cfg in config_file_list:
            cfg_new = config.get_work_path(os.path.basename(cfg))
            cfg_new_exists = os.path.exists(cfg_new)
            if cfg_new_exists:
                is_instrumented = self._config_is_instrumented(cfg_new)
                do_copy = False
            else:
                is_instrumented = self._config_is_instrumented(cfg)
                do_copy = True
            do_prepare = (must_prepare or auto_prepare) and not is_instrumented
            do_copy = do_copy or do_prepare
            if do_copy:
                config_file_list_todo.append((cfg, cfg_new, do_prepare))
            config_file_status_list.append({
                1:
                cfg.split(common_path, 1)[1].lstrip('/'),
                2:
                cfg_new_exists,
                3:
                is_instrumented,
                4:
                do_prepare
            })

        if config_file_status_list:
            config_file_status_header = [(1, 'Config file'), (2, 'Work dir'),
                                         (3, 'Instrumented'), (4, 'Scheduled')]
            ConsoleTable.create(config_file_status_header,
                                config_file_status_list, 'lccc')
        return config_file_list_todo

    def _config_is_instrumented(self, fn):
        cfg = SafeFile(fn).read_close()
        for tag in self._needed_vn_set:
            if (not '__%s__' % tag in cfg) and (not '@%s@' % tag in cfg):
                return False
        return True

    def _config_store_backup(self, source, target, fragment_path=None):
        content = SafeFile(source).read_close()
        if fragment_path:
            self._log.info('Instrumenting... %s', os.path.basename(source))
            content += SafeFile(fragment_path).read_close()
        SafeFile(target, 'w').write_close(content)

    def _create_datasource(self, config, name, psrc_repository, psrc_list):
        psrc_data = SCRAMTask._create_datasource(self, config, name,
                                                 psrc_repository, psrc_list)
        if psrc_data is not None:
            self._needed_vn_set.update(psrc_data.get_needed_dataset_keys())
        return psrc_data

    def _get_cmssw_path_list(self, config):
        result = []
        path_cmssw_user = config.get(['cmssw dir', 'vo software dir'], '')
        if path_cmssw_user:
            path_cmssw_local = os.path.abspath(clean_path(path_cmssw_user))
            if os.path.exists(path_cmssw_local):
                path_cmssw_user = path_cmssw_local
        if path_cmssw_user:
            result.append(('CMSSW_DIR_USER', path_cmssw_user))
        if self._old_release_top:
            path_scram_project = os.path.normpath('%s/../../../../' %
                                                  self._old_release_top)
            result.append(('CMSSW_DIR_PRO', path_scram_project))
        self._log.info(
            'Local jobs will try to use the CMSSW software located here:')
        for idx, loc in enumerate(result):
            self._log.info(' %i) %s', idx + 1, loc[1])
        if result:
            self._log.info('')
        return result

    def _get_config_file_default(self):
        if self.prolog.is_active() or self.epilog.is_active():
            return []
        return unspecified

    def _get_var_name_list(self):
        result = SCRAMTask._get_var_name_list(self)
        if not self._has_dataset:
            result.append('MAX_EVENTS')
        return result

    def _process_config_file_list(self, config, config_file_list,
                                  fragment_path, auto_prepare, must_prepare):
        # process list of uninitialized config files
        iter_uninitialized_config_files = self._config_find_uninitialized(
            config, config_file_list, auto_prepare, must_prepare)
        for (cfg, cfg_new, do_prepare) in iter_uninitialized_config_files:
            ask_user_msg = 'Do you want to prepare %s for running over the dataset?' % cfg
            if do_prepare and (auto_prepare
                               or self._uii.prompt_bool(ask_user_msg, True)):
                self._config_store_backup(cfg, cfg_new, fragment_path)
            else:
                self._config_store_backup(cfg, cfg_new)

        result = []
        for cfg in config_file_list:
            cfg_new = config.get_work_path(os.path.basename(cfg))
            if not os.path.exists(cfg_new):
                raise ConfigError(
                    'Config file %r was not copied to the work directory!' %
                    cfg)
            is_instrumented = self._config_is_instrumented(cfg_new)
            if must_prepare and not is_instrumented:
                raise ConfigError(
                    'Config file %r must use %s to work properly!' %
                    (cfg,
                     str.join(
                         ', ',
                         imap(lambda x: '@%s@' % x, sorted(
                             self._needed_vn_set)))))
            if auto_prepare and not is_instrumented:
                self._log.warning('Config file %r was not instrumented!', cfg)
            result.append(cfg_new)
        return result
Exemplo n.º 13
0
 def __init__(self, option):
     (self._option, self._uii) = (option, UserInputInterface())
Exemplo n.º 14
0
def deprecated(text):
    log = logging.getLogger('console')
    log.critical('\n%s\n[DEPRECATED] %s',
                 SafeFile(get_path_share('fail.txt')).read_close(), text)
    if not UserInputInterface().prompt_bool('Do you want to continue?', False):
        sys.exit(os.EX_TEMPFAIL)
Exemplo n.º 15
0
	def __init__(self, config, name):
		config.set('se input timeout', '0:30')
		config.set('application', 'cmsRun', section='dashboard')
		config.set('dataset provider', 'DBS3Provider')
		config.set('dataset splitter', 'EventBoundarySplitter')
		config.set('dataset processor', 'LumiDataProcessor', '+=')
		config.set('partition processor', 'BasicPartitionProcessor', '-=')
		config.set('partition processor',
			'LFNPartitionProcessor LumiPartitionProcessor CMSSWPartitionProcessor', '+=')

		self._needed_vn_set = set()
		SCRAMTask.__init__(self, config, name)
		self._uii = UserInputInterface()

		# Setup file path informations
		self._cmsrun_output_files = ['cmssw.dbs.tar.gz']
		if self._do_gzip_std_output:
			self._cmsrun_output_files.append('cmssw.log.gz')
		self._script_fpi = Result(path_rel='gc-run.cmssw.sh',
			path_abs=get_path_share('gc-run.cmssw.sh', pkg='grid_control_cms'))

		if self._scram_project != 'CMSSW':
			raise ConfigError('Project area contains no CMSSW project')

		self._old_release_top = None
		if self._project_area:
			scram_arch_env_path = os.path.join(self._project_area, '.SCRAM', self._scram_arch, 'Environment')
			self._old_release_top = self._parse_scram_file(scram_arch_env_path).get('RELEASETOP')

		self._update_map_error_code2msg(
			get_path_share('gc-run.cmssw.sh', pkg='grid_control_cms'))

		self._project_area_tarball_on_se = config.get_bool(['se runtime', 'se project area'], True)
		self._project_area_tarball = config.get_work_path('cmssw-project-area.tar.gz')

		# Prolog / Epilog script support - warn about old syntax
		self.prolog = TaskExecutableWrapper(config, 'prolog', '')
		self.epilog = TaskExecutableWrapper(config, 'epilog', '')
		if config.get_fn_list('executable', []) != []:
			raise ConfigError('Prefix executable and argument options with either prolog or epilog!')
		self.arguments = config.get('arguments', '')

		# Get cmssw config files and check their existance
		# Check that for dataset jobs the necessary placeholders are in the config file
		if not self._has_dataset:
			self._events_per_job = config.get('events per job', '0')
			# this can be a variable like @USER_EVENTS@!
			self._needed_vn_set.add('MAX_EVENTS')
		fragment = config.get_fn('instrumentation fragment',
			get_path_share('fragmentForCMSSW.py', pkg='grid_control_cms'))
		self._config_fn_list = self._process_config_file_list(config,
			config.get_fn_list('config file', self._get_config_file_default()),
			fragment, auto_prepare=config.get_bool('instrumentation', True),
			must_prepare=self._has_dataset)

		# Create project area tarball
		if self._project_area and not os.path.exists(self._project_area_tarball):
			config.set_state(True, 'init', detail='sandbox')
		# Information about search order for software environment
		self._cmssw_search_dict = self._get_cmssw_path_list(config)
		if config.get_state('init', detail='sandbox'):
			msg = 'CMSSW tarball already exists! Do you want to regenerate it?'
			if os.path.exists(self._project_area_tarball) and not self._uii.prompt_bool(msg, True):
				return
			# Generate CMSSW tarball
			if self._project_area:
				create_tarball(_match_files(self._project_area,
					self._project_area_matcher, self._always_matcher.create_matcher(''),
					self._project_area_base_fn), name=self._project_area_tarball)
			if self._project_area_tarball_on_se:
				config.set_state(True, 'init', detail='storage')
Exemplo n.º 16
0
class CMSSW(SCRAMTask):
	alias_list = ['']
	config_section_list = SCRAMTask.config_section_list + ['CMSSW']

	def __init__(self, config, name):
		config.set('se input timeout', '0:30')
		config.set('application', 'cmsRun', section='dashboard')
		config.set('dataset provider', 'DBS3Provider')
		config.set('dataset splitter', 'EventBoundarySplitter')
		config.set('dataset processor', 'LumiDataProcessor', '+=')
		config.set('partition processor', 'BasicPartitionProcessor', '-=')
		config.set('partition processor',
			'LFNPartitionProcessor LumiPartitionProcessor CMSSWPartitionProcessor', '+=')

		self._needed_vn_set = set()
		SCRAMTask.__init__(self, config, name)
		self._uii = UserInputInterface()

		# Setup file path informations
		self._cmsrun_output_files = ['cmssw.dbs.tar.gz']
		if self._do_gzip_std_output:
			self._cmsrun_output_files.append('cmssw.log.gz')
		self._script_fpi = Result(path_rel='gc-run.cmssw.sh',
			path_abs=get_path_share('gc-run.cmssw.sh', pkg='grid_control_cms'))

		if self._scram_project != 'CMSSW':
			raise ConfigError('Project area contains no CMSSW project')

		self._old_release_top = None
		if self._project_area:
			scram_arch_env_path = os.path.join(self._project_area, '.SCRAM', self._scram_arch, 'Environment')
			self._old_release_top = self._parse_scram_file(scram_arch_env_path).get('RELEASETOP')

		self._update_map_error_code2msg(
			get_path_share('gc-run.cmssw.sh', pkg='grid_control_cms'))

		self._project_area_tarball_on_se = config.get_bool(['se runtime', 'se project area'], True)
		self._project_area_tarball = config.get_work_path('cmssw-project-area.tar.gz')

		# Prolog / Epilog script support - warn about old syntax
		self.prolog = TaskExecutableWrapper(config, 'prolog', '')
		self.epilog = TaskExecutableWrapper(config, 'epilog', '')
		if config.get_fn_list('executable', []) != []:
			raise ConfigError('Prefix executable and argument options with either prolog or epilog!')
		self.arguments = config.get('arguments', '')

		# Get cmssw config files and check their existance
		# Check that for dataset jobs the necessary placeholders are in the config file
		if not self._has_dataset:
			self._events_per_job = config.get('events per job', '0')
			# this can be a variable like @USER_EVENTS@!
			self._needed_vn_set.add('MAX_EVENTS')
		fragment = config.get_fn('instrumentation fragment',
			get_path_share('fragmentForCMSSW.py', pkg='grid_control_cms'))
		self._config_fn_list = self._process_config_file_list(config,
			config.get_fn_list('config file', self._get_config_file_default()),
			fragment, auto_prepare=config.get_bool('instrumentation', True),
			must_prepare=self._has_dataset)

		# Create project area tarball
		if self._project_area and not os.path.exists(self._project_area_tarball):
			config.set_state(True, 'init', detail='sandbox')
		# Information about search order for software environment
		self._cmssw_search_dict = self._get_cmssw_path_list(config)
		if config.get_state('init', detail='sandbox'):
			msg = 'CMSSW tarball already exists! Do you want to regenerate it?'
			if os.path.exists(self._project_area_tarball) and not self._uii.prompt_bool(msg, True):
				return
			# Generate CMSSW tarball
			if self._project_area:
				create_tarball(_match_files(self._project_area,
					self._project_area_matcher, self._always_matcher.create_matcher(''),
					self._project_area_base_fn), name=self._project_area_tarball)
			if self._project_area_tarball_on_se:
				config.set_state(True, 'init', detail='storage')

	def get_command(self):
		return './gc-run.cmssw.sh $@'

	def get_job_arguments(self, jobnum):
		return SCRAMTask.get_job_arguments(self, jobnum) + ' ' + self.arguments

	def get_job_dict(self, jobnum):
		# Get job dependent environment variables
		job_env_dict = SCRAMTask.get_job_dict(self, jobnum)
		if not self._has_dataset:
			job_env_dict['MAX_EVENTS'] = self._events_per_job
		job_env_dict.update(dict(self._cmssw_search_dict))
		if self._do_gzip_std_output:
			job_env_dict['GZIP_OUT'] = 'yes'
		if self._project_area_tarball_on_se:
			job_env_dict['SE_RUNTIME'] = 'yes'
		if self._project_area:
			job_env_dict['HAS_RUNTIME'] = 'yes'
		job_env_dict['CMSSW_EXEC'] = 'cmsRun'
		job_env_dict['CMSSW_CONFIG'] = str.join(' ', imap(os.path.basename, self._config_fn_list))
		job_env_dict['CMSSW_OLD_RELEASETOP'] = self._old_release_top
		if self.prolog.is_active():
			job_env_dict['CMSSW_PROLOG_EXEC'] = self.prolog.get_command()
			job_env_dict['CMSSW_PROLOG_SB_IN_FILES'] = str.join(' ',
				imap(lambda x: x.path_rel, self.prolog.get_sb_in_fpi_list()))
			job_env_dict['CMSSW_PROLOG_ARGS'] = self.prolog.get_arguments()
		if self.epilog.is_active():
			job_env_dict['CMSSW_EPILOG_EXEC'] = self.epilog.get_command()
			job_env_dict['CMSSW_EPILOG_SB_IN_FILES'] = str.join(' ',
				imap(lambda x: x.path_rel, self.epilog.get_sb_in_fpi_list()))
			job_env_dict['CMSSW_EPILOG_ARGS'] = self.epilog.get_arguments()
		return job_env_dict

	def get_sb_in_fpi_list(self):
		# Get files for input sandbox
		fpi_list = (SCRAMTask.get_sb_in_fpi_list(self) + self.prolog.get_sb_in_fpi_list() +
			self.epilog.get_sb_in_fpi_list())
		for config_file in self._config_fn_list:
			fpi_list.append(Result(path_abs=config_file, path_rel=os.path.basename(config_file)))
		if self._project_area and not self._project_area_tarball_on_se:
			fpi_list.append(Result(path_abs=self._project_area_tarball,
				path_rel=os.path.basename(self._project_area_tarball)))
		return fpi_list + [self._script_fpi]

	def get_sb_out_fn_list(self):
		# Get files for output sandbox
		if not self._config_fn_list:
			return SCRAMTask.get_sb_out_fn_list(self)
		return SCRAMTask.get_sb_out_fn_list(self) + self._cmsrun_output_files

	def get_se_in_fn_list(self):
		# Get files to be transfered via SE (description, source, target)
		files = SCRAMTask.get_se_in_fn_list(self)
		if self._project_area and self._project_area_tarball_on_se:
			return files + [('CMSSW tarball', self._project_area_tarball, self._task_id + '.tar.gz')]
		return files

	def _config_find_uninitialized(self, config, config_file_list, auto_prepare, must_prepare):
		common_path = os.path.dirname(os.path.commonprefix(config_file_list))

		config_file_list_todo = []
		config_file_status_list = []
		for cfg in config_file_list:
			cfg_new = config.get_work_path(os.path.basename(cfg))
			cfg_new_exists = os.path.exists(cfg_new)
			if cfg_new_exists:
				is_instrumented = self._config_is_instrumented(cfg_new)
				do_copy = False
			else:
				is_instrumented = self._config_is_instrumented(cfg)
				do_copy = True
			do_prepare = (must_prepare or auto_prepare) and not is_instrumented
			do_copy = do_copy or do_prepare
			if do_copy:
				config_file_list_todo.append((cfg, cfg_new, do_prepare))
			config_file_status_list.append({1: cfg.split(common_path, 1)[1].lstrip('/'), 2: cfg_new_exists,
				3: is_instrumented, 4: do_prepare})

		if config_file_status_list:
			config_file_status_header = [(1, 'Config file'), (2, 'Work dir'),
				(3, 'Instrumented'), (4, 'Scheduled')]
			ConsoleTable.create(config_file_status_header, config_file_status_list, 'lccc')
		return config_file_list_todo

	def _config_is_instrumented(self, fn):
		cfg = SafeFile(fn).read_close()
		for tag in self._needed_vn_set:
			if (not '__%s__' % tag in cfg) and (not '@%s@' % tag in cfg):
				return False
		return True

	def _config_store_backup(self, source, target, fragment_path=None):
		content = SafeFile(source).read_close()
		if fragment_path:
			self._log.info('Instrumenting... %s', os.path.basename(source))
			content += SafeFile(fragment_path).read_close()
		SafeFile(target, 'w').write_close(content)

	def _create_datasource(self, config, name, psrc_repository, psrc_list):
		psrc_data = SCRAMTask._create_datasource(self, config, name, psrc_repository, psrc_list)
		if psrc_data is not None:
			self._needed_vn_set.update(psrc_data.get_needed_dataset_keys())
		return psrc_data

	def _get_cmssw_path_list(self, config):
		result = []
		path_cmssw_user = config.get(['cmssw dir', 'vo software dir'], '')
		if path_cmssw_user:
			path_cmssw_local = os.path.abspath(clean_path(path_cmssw_user))
			if os.path.exists(path_cmssw_local):
				path_cmssw_user = path_cmssw_local
		if path_cmssw_user:
			result.append(('CMSSW_DIR_USER', path_cmssw_user))
		if self._old_release_top:
			path_scram_project = os.path.normpath('%s/../../../../' % self._old_release_top)
			result.append(('CMSSW_DIR_PRO', path_scram_project))
		self._log.info('Local jobs will try to use the CMSSW software located here:')
		for idx, loc in enumerate(result):
			self._log.info(' %i) %s', idx + 1, loc[1])
		if result:
			self._log.info('')
		return result

	def _get_config_file_default(self):
		if self.prolog.is_active() or self.epilog.is_active():
			return []
		return unspecified

	def _get_var_name_list(self):
		result = SCRAMTask._get_var_name_list(self)
		if not self._has_dataset:
			result.append('MAX_EVENTS')
		return result

	def _process_config_file_list(self, config, config_file_list,
			fragment_path, auto_prepare, must_prepare):
		# process list of uninitialized config files
		iter_uninitialized_config_files = self._config_find_uninitialized(config,
			config_file_list, auto_prepare, must_prepare)
		for (cfg, cfg_new, do_prepare) in iter_uninitialized_config_files:
			ask_user_msg = 'Do you want to prepare %s for running over the dataset?' % cfg
			if do_prepare and (auto_prepare or self._uii.prompt_bool(ask_user_msg, True)):
				self._config_store_backup(cfg, cfg_new, fragment_path)
			else:
				self._config_store_backup(cfg, cfg_new)

		result = []
		for cfg in config_file_list:
			cfg_new = config.get_work_path(os.path.basename(cfg))
			if not os.path.exists(cfg_new):
				raise ConfigError('Config file %r was not copied to the work directory!' % cfg)
			is_instrumented = self._config_is_instrumented(cfg_new)
			if must_prepare and not is_instrumented:
				raise ConfigError('Config file %r must use %s to work properly!' %
					(cfg, str.join(', ', imap(lambda x: '@%s@' % x, sorted(self._needed_vn_set)))))
			if auto_prepare and not is_instrumented:
				self._log.warning('Config file %r was not instrumented!', cfg)
			result.append(cfg_new)
		return result