Пример #1
0
class DashboardLocal(LocalEventHandler):
	alias_list = ['dashboard']
	config_section_list = LocalEventHandler.config_section_list + ['dashboard']

	def __init__(self, config, name, task):
		LocalEventHandler.__init__(self, config, name, task)
		self._app = config.get('application', 'shellscript', on_change=None)
		self._dashboard_timeout = config.get_time('dashboard timeout', 5, on_change=None)
		self._tasktype = config.get('task', 'analysis', on_change=None)
		self._taskname = config.get('task name', '@GC_TASK_ID@_@DATASETNICK@', on_change=None)
		self._map_status_job2dashboard = {Job.DONE: 'DONE', Job.FAILED: 'DONE', Job.SUCCESS: 'DONE',
			Job.RUNNING: 'RUNNING', Job.ABORTED: 'ABORTED', Job.CANCELLED: 'CANCELLED'}
		self._tp = GCThreadPool()

	def on_job_output(self, wms, job_obj, jobnum, exit_code):
		self._update_dashboard(wms, job_obj, jobnum, job_obj, {'ExeExitCode': exit_code})

	def on_job_submit(self, wms, job_obj, jobnum):
		# Called on job submission
		token = wms.get_access_token(job_obj.gc_id)
		job_config_dict = self._task.get_job_dict(jobnum)
		self._start_publish(job_obj, jobnum, 'submission', [{'user': get_local_username(),
			'GridName': '/CN=%s' % token.get_user_name(), 'CMSUser': token.get_user_name(),
			'tool': 'grid-control', 'JSToolVersion': get_version(),
			'SubmissionType': 'direct', 'tool_ui': os.environ.get('HOSTNAME', ''),
			'application': job_config_dict.get('SCRAM_PROJECTVERSION', self._app),
			'exe': job_config_dict.get('CMSSW_EXEC', 'shellscript'), 'taskType': self._tasktype,
			'scheduler': wms.get_object_name(), 'vo': token.get_group(),
			'nevtJob': job_config_dict.get('MAX_EVENTS', 0),
			'datasetFull': job_config_dict.get('DATASETPATH', 'none')}])

	def on_job_update(self, wms, job_obj, jobnum, data):
		self._update_dashboard(wms, job_obj, jobnum, job_obj, {})

	def on_workflow_finish(self):
		self._tp.wait_and_drop(self._dashboard_timeout)

	def _publish(self, job_obj, jobnum, task_id, usermsg):
		(_, backend, wms_id) = job_obj.gc_id.split('.', 2)
		dash_id = '%s_%s' % (jobnum, wms_id)
		if 'http' not in job_obj.gc_id:
			dash_id = '%s_https://%s:/%s' % (jobnum, backend, wms_id)
		msg = dict_union({'taskId': task_id, 'jobId': dash_id, 'sid': wms_id}, *usermsg)
		DashboardAPI(task_id, dash_id).publish(**filter_dict(msg, value_filter=identity))

	def _start_publish(self, job_obj, jobnum, desc, msg):
		task_id = self._task.substitute_variables('dashboard task id', self._taskname, jobnum,
			additional_var_dict={'DATASETNICK': ''}).strip('_')
		self._tp.start_daemon('Notifying dashboard about %s of job %d' % (desc, jobnum),
			self._publish, job_obj, jobnum, task_id, msg)

	def _update_dashboard(self, wms, job_obj, jobnum, data, add_dict):
		# Called on job status update and output
		# Translate status into dashboard status message
		status_dashboard = self._map_status_job2dashboard.get(job_obj.state, 'PENDING')
		self._start_publish(job_obj, jobnum, 'status', [{'StatusValue': status_dashboard,
			'StatusValueReason': data.get('reason', status_dashboard).upper(),
			'StatusEnterTime': data.get('timestamp', time.strftime('%Y-%m-%d_%H:%M:%S', time.localtime())),
			'StatusDestination': job_obj.get_job_location()}, add_dict])
Пример #2
0
def process_all(opts, args):
    # Init everything in each loop to pick up changes
    script_obj = get_script_object(args[0],
                                   opts.job_selector,
                                   only_success=False)
    token = AccessToken.create_instance(opts.token, script_obj.new_config,
                                        'token')
    work_dn = script_obj.config.get_work_path()
    if process_all.first:
        logging.getLogger().addHandler(
            ProcessArchiveHandler(os.path.join(work_dn, 'error.tar')))
        process_all.first = False

    # Create SE output dir
    if not opts.output:
        opts.output = os.path.join(work_dn, 'se_output')
    if '://' not in opts.output:
        opts.output = 'file:///%s' % os.path.abspath(opts.output)

    job_db = script_obj.job_db
    jobnum_list = job_db.get_job_list()
    status_mon = StatusMonitor(len(jobnum_list))
    if opts.shuffle:
        random.shuffle(jobnum_list)
    else:
        jobnum_list.sort()

    if opts.threads:
        activity = Activity('Processing jobs')
        pool = GCThreadPool(opts.threads)
        for jobnum in jobnum_list:
            pool.start_daemon('Processing job %d' % jobnum, process_job, opts,
                              work_dn, status_mon, job_db, token, jobnum)
        pool.wait_and_drop()
        activity.finish()
    else:
        progress = ProgressActivity('Processing job', max(jobnum_list) + 1)
        for jobnum in jobnum_list:
            progress.update_progress(jobnum)
            process_job(opts, work_dn, status_mon, job_db, token, jobnum)
        progress.finish()

    # Print overview
    if not opts.hide_results:
        status_mon.show_results()
    return status_mon.is_finished()
def process_all(opts, args):
	# Init everything in each loop to pick up changes
	script_obj = get_script_object(args[0], opts.job_selector, only_success=False)
	token = AccessToken.create_instance(opts.token, script_obj.new_config, 'token')
	work_dn = script_obj.config.get_work_path()
	if process_all.first:
		logging.getLogger().addHandler(ProcessArchiveHandler(os.path.join(work_dn, 'error.tar')))
		process_all.first = False

	# Create SE output dir
	if not opts.output:
		opts.output = os.path.join(work_dn, 'se_output')
	if '://' not in opts.output:
		opts.output = 'file:///%s' % os.path.abspath(opts.output)

	job_db = script_obj.job_db
	jobnum_list = job_db.get_job_list()
	status_mon = StatusMonitor(len(jobnum_list))
	if opts.shuffle:
		random.shuffle(jobnum_list)
	else:
		jobnum_list.sort()

	if opts.threads:
		activity = Activity('Processing jobs')
		pool = GCThreadPool(opts.threads)
		for jobnum in jobnum_list:
			pool.start_daemon('Processing job %d' % jobnum, process_job,
				opts, work_dn, status_mon, job_db, token, jobnum)
		pool.wait_and_drop()
		activity.finish()
	else:
		progress = ProgressActivity('Processing job', max(jobnum_list) + 1)
		for jobnum in jobnum_list:
			progress.update_progress(jobnum)
			process_job(opts, work_dn, status_mon, job_db, token, jobnum)
		progress.finish()

	# Print overview
	if not opts.hide_results:
		status_mon.show_results()
	return status_mon.is_finished()
Пример #4
0
class ScriptEventHandler(LocalEventHandler):
	alias_list = ['scripts']
	config_section_list = LocalEventHandler.config_section_list + ['scripts']

	def __init__(self, config, name):
		LocalEventHandler.__init__(self, config, name)
		self._silent = config.get_bool('silent', True, on_change=None)
		self._script_submit = config.get_command('on submit', '', on_change=None)
		self._script_status = config.get_command('on status', '', on_change=None)
		self._script_output = config.get_command('on output', '', on_change=None)
		self._script_finish = config.get_command('on finish', '', on_change=None)
		self._script_timeout = config.get_time('script timeout', 20, on_change=None)
		self._path_work = config.get_work_path()
		self._tp = GCThreadPool()

	def on_job_output(self, task, wms, job_obj, jobnum, exit_code):
		# Called on job status update
		self._run_in_background(self._script_output, task, jobnum, job_obj, {'RETCODE': exit_code})

	def on_job_submit(self, task, wms, job_obj, jobnum):
		# Called on job submission
		self._run_in_background(self._script_submit, task, jobnum, job_obj)

	def on_job_update(self, task, wms, job_obj, jobnum, data):
		# Called on job status update
		self._run_in_background(self._script_status, task, jobnum, job_obj)

	def on_task_finish(self, task, job_len):
		# Called at the end of the task
		self._run_in_background(self._script_finish, task,
			jobnum=0, additional_var_dict={'NJOBS': job_len})

	def on_workflow_finish(self):
		self._tp.wait_and_drop(self._script_timeout)

	def _run_in_background(self, script, task, jobnum=None, job_obj=None, additional_var_dict=None):
		if script != '':
			self._tp.start_daemon('Running event handler script %s' % script,
				self._script_thread, script, task, jobnum, job_obj, additional_var_dict)

	def _script_thread(self, script, task, jobnum=None, job_obj=None, add_dict=None):
		# Get both task and job config / state dicts
		try:
			tmp = {}
			if job_obj is not None:
				for key, value in job_obj.get_dict().items():
					tmp[key.upper()] = value
			tmp['GC_WORKDIR'] = self._path_work
			if jobnum is not None:
				tmp.update(task.get_job_dict(jobnum))
			tmp.update(add_dict or {})
			env = dict(os.environ)
			for key, value in tmp.items():
				if not key.startswith('GC_'):
					key = 'GC_' + key
				env[key] = str(value)

			script = task.substitute_variables('monitoring script', script, jobnum, tmp)
			if not self._silent:
				proc = LocalProcess(*shlex.split(script), **{'env_dict': env})
				proc_output = proc.get_output(timeout=self._script_timeout)
				if proc_output.strip():
					self._log.info(proc_output.strip())
			else:
				os.system(script)
		except Exception:
			self._log.exception('Error while running user script')
			clear_current_exception()
Пример #5
0
class DashboardLocal(LocalEventHandler):
    alias_list = ['dashboard']
    config_section_list = LocalEventHandler.config_section_list + ['dashboard']

    def __init__(self, config, name, task):
        LocalEventHandler.__init__(self, config, name, task)
        self._app = config.get('application', 'shellscript', on_change=None)
        self._dashboard_timeout = config.get_time('dashboard timeout',
                                                  5,
                                                  on_change=None)
        self._tasktype = config.get('task', 'analysis', on_change=None)
        self._taskname = config.get('task name',
                                    '@GC_TASK_ID@_@DATASETNICK@',
                                    on_change=None)
        self._map_status_job2dashboard = {
            Job.DONE: 'DONE',
            Job.FAILED: 'DONE',
            Job.SUCCESS: 'DONE',
            Job.RUNNING: 'RUNNING',
            Job.ABORTED: 'ABORTED',
            Job.CANCELLED: 'CANCELLED'
        }
        self._tp = GCThreadPool()

    def on_job_output(self, wms, job_obj, jobnum, exit_code):
        self._update_dashboard(wms, job_obj, jobnum, job_obj,
                               {'ExeExitCode': exit_code})

    def on_job_submit(self, wms, job_obj, jobnum):
        # Called on job submission
        token = wms.get_access_token(job_obj.gc_id)
        job_config_dict = self._task.get_job_dict(jobnum)
        self._start_publish(job_obj, jobnum, 'submission', [{
            'user':
            get_local_username(),
            'GridName':
            '/CN=%s' % token.get_user_name(),
            'CMSUser':
            token.get_user_name(),
            'tool':
            'grid-control',
            'JSToolVersion':
            get_version(),
            'SubmissionType':
            'direct',
            'tool_ui':
            os.environ.get('HOSTNAME', ''),
            'application':
            job_config_dict.get('SCRAM_PROJECTVERSION', self._app),
            'exe':
            job_config_dict.get('CMSSW_EXEC', 'shellscript'),
            'taskType':
            self._tasktype,
            'scheduler':
            wms.get_object_name(),
            'vo':
            token.get_group(),
            'nevtJob':
            job_config_dict.get('MAX_EVENTS', 0),
            'datasetFull':
            job_config_dict.get('DATASETPATH', 'none')
        }])

    def on_job_update(self, wms, job_obj, jobnum, data):
        self._update_dashboard(wms, job_obj, jobnum, job_obj, {})

    def on_workflow_finish(self):
        self._tp.wait_and_drop(self._dashboard_timeout)

    def _publish(self, job_obj, jobnum, task_id, usermsg):
        (_, backend, wms_id) = job_obj.gc_id.split('.', 2)
        dash_id = '%s_%s' % (jobnum, wms_id)
        if 'http' not in job_obj.gc_id:
            dash_id = '%s_https://%s:/%s' % (jobnum, backend, wms_id)
        msg = dict_union({
            'taskId': task_id,
            'jobId': dash_id,
            'sid': wms_id
        }, *usermsg)
        DashboardAPI(
            task_id,
            dash_id).publish(**filter_dict(msg, value_filter=identity))

    def _start_publish(self, job_obj, jobnum, desc, msg):
        task_id = self._task.substitute_variables('dashboard task id',
                                                  self._taskname,
                                                  jobnum,
                                                  additional_var_dict={
                                                      'DATASETNICK': ''
                                                  }).strip('_')
        self._tp.start_daemon(
            'Notifying dashboard about %s of job %d' % (desc, jobnum),
            self._publish, job_obj, jobnum, task_id, msg)

    def _update_dashboard(self, wms, job_obj, jobnum, data, add_dict):
        # Called on job status update and output
        # Translate status into dashboard status message
        status_dashboard = self._map_status_job2dashboard.get(
            job_obj.state, 'PENDING')
        self._start_publish(job_obj, jobnum, 'status', [{
            'StatusValue':
            status_dashboard,
            'StatusValueReason':
            data.get('reason', status_dashboard).upper(),
            'StatusEnterTime':
            data.get('timestamp',
                     time.strftime('%Y-%m-%d_%H:%M:%S', time.localtime())),
            'StatusDestination':
            job_obj.get_job_location()
        }, add_dict])