class DashboardLocal(LocalEventHandler): alias_list = ['dashboard'] config_section_list = LocalEventHandler.config_section_list + ['dashboard'] def __init__(self, config, name, task): LocalEventHandler.__init__(self, config, name, task) self._app = config.get('application', 'shellscript', on_change=None) self._dashboard_timeout = config.get_time('dashboard timeout', 5, on_change=None) self._tasktype = config.get('task', 'analysis', on_change=None) self._taskname = config.get('task name', '@GC_TASK_ID@_@DATASETNICK@', on_change=None) self._map_status_job2dashboard = {Job.DONE: 'DONE', Job.FAILED: 'DONE', Job.SUCCESS: 'DONE', Job.RUNNING: 'RUNNING', Job.ABORTED: 'ABORTED', Job.CANCELLED: 'CANCELLED'} self._tp = GCThreadPool() def on_job_output(self, wms, job_obj, jobnum, exit_code): self._update_dashboard(wms, job_obj, jobnum, job_obj, {'ExeExitCode': exit_code}) def on_job_submit(self, wms, job_obj, jobnum): # Called on job submission token = wms.get_access_token(job_obj.gc_id) job_config_dict = self._task.get_job_dict(jobnum) self._start_publish(job_obj, jobnum, 'submission', [{'user': get_local_username(), 'GridName': '/CN=%s' % token.get_user_name(), 'CMSUser': token.get_user_name(), 'tool': 'grid-control', 'JSToolVersion': get_version(), 'SubmissionType': 'direct', 'tool_ui': os.environ.get('HOSTNAME', ''), 'application': job_config_dict.get('SCRAM_PROJECTVERSION', self._app), 'exe': job_config_dict.get('CMSSW_EXEC', 'shellscript'), 'taskType': self._tasktype, 'scheduler': wms.get_object_name(), 'vo': token.get_group(), 'nevtJob': job_config_dict.get('MAX_EVENTS', 0), 'datasetFull': job_config_dict.get('DATASETPATH', 'none')}]) def on_job_update(self, wms, job_obj, jobnum, data): self._update_dashboard(wms, job_obj, jobnum, job_obj, {}) def on_workflow_finish(self): self._tp.wait_and_drop(self._dashboard_timeout) def _publish(self, job_obj, jobnum, task_id, usermsg): (_, backend, wms_id) = job_obj.gc_id.split('.', 2) dash_id = '%s_%s' % (jobnum, wms_id) if 'http' not in job_obj.gc_id: dash_id = '%s_https://%s:/%s' % (jobnum, backend, wms_id) msg = dict_union({'taskId': task_id, 'jobId': dash_id, 'sid': wms_id}, *usermsg) DashboardAPI(task_id, dash_id).publish(**filter_dict(msg, value_filter=identity)) def _start_publish(self, job_obj, jobnum, desc, msg): task_id = self._task.substitute_variables('dashboard task id', self._taskname, jobnum, additional_var_dict={'DATASETNICK': ''}).strip('_') self._tp.start_daemon('Notifying dashboard about %s of job %d' % (desc, jobnum), self._publish, job_obj, jobnum, task_id, msg) def _update_dashboard(self, wms, job_obj, jobnum, data, add_dict): # Called on job status update and output # Translate status into dashboard status message status_dashboard = self._map_status_job2dashboard.get(job_obj.state, 'PENDING') self._start_publish(job_obj, jobnum, 'status', [{'StatusValue': status_dashboard, 'StatusValueReason': data.get('reason', status_dashboard).upper(), 'StatusEnterTime': data.get('timestamp', time.strftime('%Y-%m-%d_%H:%M:%S', time.localtime())), 'StatusDestination': job_obj.get_job_location()}, add_dict])
def __init__(self, config, name): LocalEventHandler.__init__(self, config, name) self._silent = config.get_bool('silent', True, on_change=None) self._script_submit = config.get_command('on submit', '', on_change=None) self._script_status = config.get_command('on status', '', on_change=None) self._script_output = config.get_command('on output', '', on_change=None) self._script_finish = config.get_command('on finish', '', on_change=None) self._script_timeout = config.get_time('script timeout', 20, on_change=None) self._path_work = config.get_work_path() self._tp = GCThreadPool()
def __init__(self, config, name, task): Monitoring.__init__(self, config, name, task) self._silent = config.getBool('silent', True, onChange=None) self._evtSubmit = config.getCommand('on submit', '', onChange=None) self._evtStatus = config.getCommand('on status', '', onChange=None) self._evtOutput = config.getCommand('on output', '', onChange=None) self._evtFinish = config.getCommand('on finish', '', onChange=None) self._runningMax = config.getTime('script runtime', 5, onChange=None) self._workPath = config.getWorkPath() self._tp = GCThreadPool()
def __init__(self, config, name, task): LocalEventHandler.__init__(self, config, name, task) self._app = config.get('application', 'shellscript', on_change=None) self._dashboard_timeout = config.get_time('dashboard timeout', 5, on_change=None) self._tasktype = config.get('task', 'analysis', on_change=None) self._taskname = config.get('task name', '@GC_TASK_ID@_@DATASETNICK@', on_change=None) self._map_status_job2dashboard = {Job.DONE: 'DONE', Job.FAILED: 'DONE', Job.SUCCESS: 'DONE', Job.RUNNING: 'RUNNING', Job.ABORTED: 'ABORTED', Job.CANCELLED: 'CANCELLED'} self._tp = GCThreadPool()
def __init__(self, config, name, task): Monitoring.__init__(self, config, name, task) jobDesc = task.getDescription(None) # TODO: use the other variables for monitoring self._app = config.get('application', 'shellscript', onChange = None) self._tasktype = config.get('task', jobDesc.jobType or 'analysis', onChange = None) self._taskname = config.get('task name', '@GC_TASK_ID@_@DATASETNICK@', onChange = None) self._statusMap = {Job.DONE: 'DONE', Job.FAILED: 'DONE', Job.SUCCESS: 'DONE', Job.RUNNING: 'RUNNING', Job.ABORTED: 'ABORTED', Job.CANCELLED: 'CANCELLED'} self._tp = GCThreadPool()
def __init__(self, config, name, task): Monitoring.__init__(self, config, name, task) self._silent = config.getBool('silent', True, onChange = None) self._evtSubmit = config.getCommand('on submit', '', onChange = None) self._evtStatus = config.getCommand('on status', '', onChange = None) self._evtOutput = config.getCommand('on output', '', onChange = None) self._evtFinish = config.getCommand('on finish', '', onChange = None) self._runningMax = config.getTime('script runtime', 5, onChange = None) self._workPath = config.getWorkPath() self._tp = GCThreadPool()
def __init__(self, config, name, task): LocalEventHandler.__init__(self, config, name, task) self._app = config.get('application', 'shellscript', on_change=None) self._dashboard_timeout = config.get_time('dashboard timeout', 5, on_change=None) self._tasktype = config.get('task', 'analysis', on_change=None) self._taskname = config.get('task name', '@GC_TASK_ID@_@DATASETNICK@', on_change=None) self._map_status_job2dashboard = { Job.DONE: 'DONE', Job.FAILED: 'DONE', Job.SUCCESS: 'DONE', Job.RUNNING: 'RUNNING', Job.ABORTED: 'ABORTED', Job.CANCELLED: 'CANCELLED' } self._tp = GCThreadPool()
def process_all(opts, args): # Init everything in each loop to pick up changes script_obj = get_script_object(args[0], opts.job_selector, only_success=False) token = AccessToken.create_instance(opts.token, script_obj.new_config, 'token') work_dn = script_obj.config.get_work_path() if process_all.first: logging.getLogger().addHandler(ProcessArchiveHandler(os.path.join(work_dn, 'error.tar'))) process_all.first = False # Create SE output dir if not opts.output: opts.output = os.path.join(work_dn, 'se_output') if '://' not in opts.output: opts.output = 'file:///%s' % os.path.abspath(opts.output) job_db = script_obj.job_db jobnum_list = job_db.get_job_list() status_mon = StatusMonitor(len(jobnum_list)) if opts.shuffle: random.shuffle(jobnum_list) else: jobnum_list.sort() if opts.threads: activity = Activity('Processing jobs') pool = GCThreadPool(opts.threads) for jobnum in jobnum_list: pool.start_daemon('Processing job %d' % jobnum, process_job, opts, work_dn, status_mon, job_db, token, jobnum) pool.wait_and_drop() activity.finish() else: progress = ProgressActivity('Processing job', max(jobnum_list) + 1) for jobnum in jobnum_list: progress.update_progress(jobnum) process_job(opts, work_dn, status_mon, job_db, token, jobnum) progress.finish() # Print overview if not opts.hide_results: status_mon.show_results() return status_mon.is_finished()
def __init__(self, config, name, task): Monitoring.__init__(self, config, name, task) jobDesc = task.getDescription( None) # TODO: use the other variables for monitoring self._app = config.get('application', 'shellscript', onChange=None) self._tasktype = config.get('task', jobDesc.jobType or 'analysis', onChange=None) self._taskname = config.get('task name', '@GC_TASK_ID@_@DATASETNICK@', onChange=None) self._statusMap = { Job.DONE: 'DONE', Job.FAILED: 'DONE', Job.SUCCESS: 'DONE', Job.RUNNING: 'RUNNING', Job.ABORTED: 'ABORTED', Job.CANCELLED: 'CANCELLED' } self._tp = GCThreadPool()
def process_all(opts, args): # Init everything in each loop to pick up changes script_obj = get_script_object(args[0], opts.job_selector, only_success=False) token = AccessToken.create_instance(opts.token, script_obj.new_config, 'token') work_dn = script_obj.config.get_work_path() if process_all.first: logging.getLogger().addHandler( ProcessArchiveHandler(os.path.join(work_dn, 'error.tar'))) process_all.first = False # Create SE output dir if not opts.output: opts.output = os.path.join(work_dn, 'se_output') if '://' not in opts.output: opts.output = 'file:///%s' % os.path.abspath(opts.output) job_db = script_obj.job_db jobnum_list = job_db.get_job_list() status_mon = StatusMonitor(len(jobnum_list)) if opts.shuffle: random.shuffle(jobnum_list) else: jobnum_list.sort() if opts.threads: activity = Activity('Processing jobs') pool = GCThreadPool(opts.threads) for jobnum in jobnum_list: pool.start_daemon('Processing job %d' % jobnum, process_job, opts, work_dn, status_mon, job_db, token, jobnum) pool.wait_and_drop() activity.finish() else: progress = ProgressActivity('Processing job', max(jobnum_list) + 1) for jobnum in jobnum_list: progress.update_progress(jobnum) process_job(opts, work_dn, status_mon, job_db, token, jobnum) progress.finish() # Print overview if not opts.hide_results: status_mon.show_results() return status_mon.is_finished()
class ScriptEventHandler(LocalEventHandler): alias_list = ['scripts'] config_section_list = LocalEventHandler.config_section_list + ['scripts'] def __init__(self, config, name): LocalEventHandler.__init__(self, config, name) self._silent = config.get_bool('silent', True, on_change=None) self._script_submit = config.get_command('on submit', '', on_change=None) self._script_status = config.get_command('on status', '', on_change=None) self._script_output = config.get_command('on output', '', on_change=None) self._script_finish = config.get_command('on finish', '', on_change=None) self._script_timeout = config.get_time('script timeout', 20, on_change=None) self._path_work = config.get_work_path() self._tp = GCThreadPool() def on_job_output(self, task, wms, job_obj, jobnum, exit_code): # Called on job status update self._run_in_background(self._script_output, task, jobnum, job_obj, {'RETCODE': exit_code}) def on_job_submit(self, task, wms, job_obj, jobnum): # Called on job submission self._run_in_background(self._script_submit, task, jobnum, job_obj) def on_job_update(self, task, wms, job_obj, jobnum, data): # Called on job status update self._run_in_background(self._script_status, task, jobnum, job_obj) def on_task_finish(self, task, job_len): # Called at the end of the task self._run_in_background(self._script_finish, task, jobnum=0, additional_var_dict={'NJOBS': job_len}) def on_workflow_finish(self): self._tp.wait_and_drop(self._script_timeout) def _run_in_background(self, script, task, jobnum=None, job_obj=None, additional_var_dict=None): if script != '': self._tp.start_daemon('Running event handler script %s' % script, self._script_thread, script, task, jobnum, job_obj, additional_var_dict) def _script_thread(self, script, task, jobnum=None, job_obj=None, add_dict=None): # Get both task and job config / state dicts try: tmp = {} if job_obj is not None: for key, value in job_obj.get_dict().items(): tmp[key.upper()] = value tmp['GC_WORKDIR'] = self._path_work if jobnum is not None: tmp.update(task.get_job_dict(jobnum)) tmp.update(add_dict or {}) env = dict(os.environ) for key, value in tmp.items(): if not key.startswith('GC_'): key = 'GC_' + key env[key] = str(value) script = task.substitute_variables('monitoring script', script, jobnum, tmp) if not self._silent: proc = LocalProcess(*shlex.split(script), **{'env_dict': env}) proc_output = proc.get_output(timeout=self._script_timeout) if proc_output.strip(): self._log.info(proc_output.strip()) else: os.system(script) except Exception: self._log.exception('Error while running user script') clear_current_exception()
class ScriptMonitoring(Monitoring): alias = ['scripts'] configSections = EventHandler.configSections + ['scripts'] def __init__(self, config, name, task): Monitoring.__init__(self, config, name, task) self._silent = config.getBool('silent', True, onChange = None) self._evtSubmit = config.getCommand('on submit', '', onChange = None) self._evtStatus = config.getCommand('on status', '', onChange = None) self._evtOutput = config.getCommand('on output', '', onChange = None) self._evtFinish = config.getCommand('on finish', '', onChange = None) self._runningMax = config.getTime('script runtime', 5, onChange = None) self._workPath = config.getWorkPath() self._tp = GCThreadPool() # Get both task and job config / state dicts def _scriptThread(self, script, jobNum = None, jobObj = None, allDict = None): try: tmp = {} if jobNum is not None: tmp.update(self._task.getSubmitInfo(jobNum)) if jobObj is not None: tmp.update(jobObj.getAll()) tmp['WORKDIR'] = self._workPath tmp.update(self._task.getTaskConfig()) if jobNum is not None: tmp.update(self._task.getJobConfig(jobNum)) tmp.update(self._task.getSubmitInfo(jobNum)) tmp.update(allDict or {}) for key, value in tmp.items(): if not key.startswith('GC_'): key = 'GC_' + key os.environ[key] = str(value) script = self._task.substVars(script, jobNum, tmp) if not self._silent: proc = LocalProcess(script) self._log.info(proc.get_output(timeout = self._runningMax)) else: os.system(script) except Exception: self._log.exception('Error while running user script!') def _runInBackground(self, script, jobNum = None, jobObj = None, addDict = None): if script != '': self._tp.start_thread('Running monitoring script %s' % script, self._scriptThread, script, jobNum, jobObj, addDict) # Called on job submission def onJobSubmit(self, wms, jobObj, jobNum): self._runInBackground(self._evtSubmit, jobNum, jobObj) # Called on job status update def onJobUpdate(self, wms, jobObj, jobNum, data): self._runInBackground(self._evtStatus, jobNum, jobObj, {'STATUS': Job.enum2str(jobObj.state)}) # Called on job status update def onJobOutput(self, wms, jobObj, jobNum, retCode): self._runInBackground(self._evtOutput, jobNum, jobObj, {'RETCODE': retCode}) # Called at the end of the task def onTaskFinish(self, nJobs): self._runInBackground(self._evtFinish, addDict = {'NJOBS': nJobs}) self._tp.wait_and_drop(self._runningMax)
class DashBoard(Monitoring): configSections = Monitoring.configSections + ['dashboard'] def __init__(self, config, name, task): Monitoring.__init__(self, config, name, task) jobDesc = task.getDescription( None) # TODO: use the other variables for monitoring self._app = config.get('application', 'shellscript', onChange=None) self._runningMax = config.getTime('dashboard timeout', 5, onChange=None) self._tasktype = config.get('task', jobDesc.jobType or 'analysis', onChange=None) self._taskname = config.get('task name', '@GC_TASK_ID@_@DATASETNICK@', onChange=None) self._statusMap = { Job.DONE: 'DONE', Job.FAILED: 'DONE', Job.SUCCESS: 'DONE', Job.RUNNING: 'RUNNING', Job.ABORTED: 'ABORTED', Job.CANCELLED: 'CANCELLED' } self._tp = GCThreadPool() def getScript(self): yield pathShare('mon.dashboard.sh', pkg='grid_control_cms') def getTaskConfig(self): result = { 'TASK_NAME': self._taskname, 'DB_EXEC': self._app, 'DATASETNICK': '' } result.update(Monitoring.getTaskConfig(self)) return result def getFiles(self): yield pathShare('mon.dashboard.sh', pkg='grid_control_cms') for fn in ('DashboardAPI.py', 'Logger.py', 'apmon.py', 'report.py'): yield pathShare('..', 'DashboardAPI', fn, pkg='grid_control_cms') def _publish(self, jobObj, jobNum, taskId, usermsg): (_, backend, rawId) = jobObj.gcID.split('.', 2) dashId = '%s_%s' % (jobNum, rawId) if 'http' not in jobObj.gcID: dashId = '%s_https://%s:/%s' % (jobNum, backend, rawId) msg = mergeDicts([{ 'taskId': taskId, 'jobId': dashId, 'sid': rawId }] + usermsg) DashboardAPI( taskId, dashId).publish(**filterDict(msg, vF=lambda v: v is not None)) def _start_publish(self, jobObj, jobNum, desc, message): taskId = self._task.substVars('dashboard task id', self._taskname, jobNum, addDict={ 'DATASETNICK': '' }).strip('_') self._tp.start_thread( 'Notifying dashboard about %s of job %d' % (desc, jobNum), self._publish, jobObj, jobNum, taskId, message) # Called on job submission def onJobSubmit(self, wms, jobObj, jobNum): token = wms.getAccessToken(jobObj.gcID) jobInfo = self._task.getJobConfig(jobNum) self._start_publish( jobObj, jobNum, 'submission', [{ 'user': os.environ['LOGNAME'], 'GridName': '/CN=%s' % token.getUsername(), 'CMSUser': token.getUsername(), 'tool': 'grid-control', 'JSToolVersion': getVersion(), 'SubmissionType': 'direct', 'tool_ui': os.environ.get('HOSTNAME', ''), 'application': jobInfo.get('SCRAM_PROJECTVERSION', self._app), 'exe': jobInfo.get('CMSSW_EXEC', 'shellscript'), 'taskType': self._tasktype, 'scheduler': wms.getObjectName(), 'vo': token.getGroup(), 'nevtJob': jobInfo.get('MAX_EVENTS', 0), 'datasetFull': jobInfo.get('DATASETPATH', 'none') }]) # Called on job status update and output def _updateDashboard(self, wms, jobObj, jobNum, data, addMsg): # Translate status into dashboard status message statusDashboard = self._statusMap.get(jobObj.state, 'PENDING') self._start_publish(jobObj, jobNum, 'status', [{ 'StatusValue': statusDashboard, 'StatusValueReason': data.get('reason', statusDashboard).upper(), 'StatusEnterTime': data.get('timestamp', time.strftime('%Y-%m-%d_%H:%M:%S', time.localtime())), 'StatusDestination': data.get('dest', '') }, addMsg]) def onJobUpdate(self, wms, jobObj, jobNum, data): self._updateDashboard(wms, jobObj, jobNum, jobObj, {}) def onJobOutput(self, wms, jobObj, jobNum, retCode): self._updateDashboard(wms, jobObj, jobNum, jobObj, {'ExeExitCode': retCode}) def onFinish(self): self._tp.wait_and_drop(self._runningMax)
class DashboardLocal(LocalEventHandler): alias_list = ['dashboard'] config_section_list = LocalEventHandler.config_section_list + ['dashboard'] def __init__(self, config, name, task): LocalEventHandler.__init__(self, config, name, task) self._app = config.get('application', 'shellscript', on_change=None) self._dashboard_timeout = config.get_time('dashboard timeout', 5, on_change=None) self._tasktype = config.get('task', 'analysis', on_change=None) self._taskname = config.get('task name', '@GC_TASK_ID@_@DATASETNICK@', on_change=None) self._map_status_job2dashboard = { Job.DONE: 'DONE', Job.FAILED: 'DONE', Job.SUCCESS: 'DONE', Job.RUNNING: 'RUNNING', Job.ABORTED: 'ABORTED', Job.CANCELLED: 'CANCELLED' } self._tp = GCThreadPool() def on_job_output(self, wms, job_obj, jobnum, exit_code): self._update_dashboard(wms, job_obj, jobnum, job_obj, {'ExeExitCode': exit_code}) def on_job_submit(self, wms, job_obj, jobnum): # Called on job submission token = wms.get_access_token(job_obj.gc_id) job_config_dict = self._task.get_job_dict(jobnum) self._start_publish(job_obj, jobnum, 'submission', [{ 'user': get_local_username(), 'GridName': '/CN=%s' % token.get_user_name(), 'CMSUser': token.get_user_name(), 'tool': 'grid-control', 'JSToolVersion': get_version(), 'SubmissionType': 'direct', 'tool_ui': os.environ.get('HOSTNAME', ''), 'application': job_config_dict.get('SCRAM_PROJECTVERSION', self._app), 'exe': job_config_dict.get('CMSSW_EXEC', 'shellscript'), 'taskType': self._tasktype, 'scheduler': wms.get_object_name(), 'vo': token.get_group(), 'nevtJob': job_config_dict.get('MAX_EVENTS', 0), 'datasetFull': job_config_dict.get('DATASETPATH', 'none') }]) def on_job_update(self, wms, job_obj, jobnum, data): self._update_dashboard(wms, job_obj, jobnum, job_obj, {}) def on_workflow_finish(self): self._tp.wait_and_drop(self._dashboard_timeout) def _publish(self, job_obj, jobnum, task_id, usermsg): (_, backend, wms_id) = job_obj.gc_id.split('.', 2) dash_id = '%s_%s' % (jobnum, wms_id) if 'http' not in job_obj.gc_id: dash_id = '%s_https://%s:/%s' % (jobnum, backend, wms_id) msg = dict_union({ 'taskId': task_id, 'jobId': dash_id, 'sid': wms_id }, *usermsg) DashboardAPI( task_id, dash_id).publish(**filter_dict(msg, value_filter=identity)) def _start_publish(self, job_obj, jobnum, desc, msg): task_id = self._task.substitute_variables('dashboard task id', self._taskname, jobnum, additional_var_dict={ 'DATASETNICK': '' }).strip('_') self._tp.start_daemon( 'Notifying dashboard about %s of job %d' % (desc, jobnum), self._publish, job_obj, jobnum, task_id, msg) def _update_dashboard(self, wms, job_obj, jobnum, data, add_dict): # Called on job status update and output # Translate status into dashboard status message status_dashboard = self._map_status_job2dashboard.get( job_obj.state, 'PENDING') self._start_publish(job_obj, jobnum, 'status', [{ 'StatusValue': status_dashboard, 'StatusValueReason': data.get('reason', status_dashboard).upper(), 'StatusEnterTime': data.get('timestamp', time.strftime('%Y-%m-%d_%H:%M:%S', time.localtime())), 'StatusDestination': job_obj.get_job_location() }, add_dict])
class ScriptMonitoring(Monitoring): alias = ['scripts'] configSections = EventHandler.configSections + ['scripts'] def __init__(self, config, name, task): Monitoring.__init__(self, config, name, task) self._silent = config.getBool('silent', True, onChange=None) self._evtSubmit = config.getCommand('on submit', '', onChange=None) self._evtStatus = config.getCommand('on status', '', onChange=None) self._evtOutput = config.getCommand('on output', '', onChange=None) self._evtFinish = config.getCommand('on finish', '', onChange=None) self._runningMax = config.getTime('script runtime', 5, onChange=None) self._workPath = config.getWorkPath() self._tp = GCThreadPool() # Get both task and job config / state dicts def _scriptThread(self, script, jobNum=None, jobObj=None, allDict=None): try: tmp = {} if jobNum is not None: tmp.update(self._task.getSubmitInfo(jobNum)) if jobObj is not None: tmp.update(jobObj.getAll()) tmp['WORKDIR'] = self._workPath tmp.update(self._task.getTaskConfig()) if jobNum is not None: tmp.update(self._task.getJobConfig(jobNum)) tmp.update(self._task.getSubmitInfo(jobNum)) tmp.update(allDict or {}) for key, value in tmp.items(): if not key.startswith('GC_'): key = 'GC_' + key os.environ[key] = str(value) script = self._task.substVars(script, jobNum, tmp) if not self._silent: proc = LocalProcess(script) self._log.info(proc.get_output(timeout=self._runningMax)) else: os.system(script) except Exception: self._log.exception('Error while running user script!') def _runInBackground(self, script, jobNum=None, jobObj=None, addDict=None): if script != '': self._tp.start_thread('Running monitoring script %s' % script, self._scriptThread, script, jobNum, jobObj, addDict) # Called on job submission def onJobSubmit(self, wms, jobObj, jobNum): self._runInBackground(self._evtSubmit, jobNum, jobObj) # Called on job status update def onJobUpdate(self, wms, jobObj, jobNum, data): self._runInBackground(self._evtStatus, jobNum, jobObj, {'STATUS': Job.enum2str(jobObj.state)}) # Called on job status update def onJobOutput(self, wms, jobObj, jobNum, retCode): self._runInBackground(self._evtOutput, jobNum, jobObj, {'RETCODE': retCode}) # Called at the end of the task def onTaskFinish(self, nJobs): self._runInBackground(self._evtFinish, addDict={'NJOBS': nJobs}) self._tp.wait_and_drop(self._runningMax)
class DashBoard(Monitoring): configSections = Monitoring.configSections + ['dashboard'] def __init__(self, config, name, task): Monitoring.__init__(self, config, name, task) jobDesc = task.getDescription(None) # TODO: use the other variables for monitoring self._app = config.get('application', 'shellscript', onChange = None) self._runningMax = config.getTime('dashboard timeout', 5, onChange = None) self._tasktype = config.get('task', jobDesc.jobType or 'analysis', onChange = None) self._taskname = config.get('task name', '@GC_TASK_ID@_@DATASETNICK@', onChange = None) self._statusMap = {Job.DONE: 'DONE', Job.FAILED: 'DONE', Job.SUCCESS: 'DONE', Job.RUNNING: 'RUNNING', Job.ABORTED: 'ABORTED', Job.CANCELLED: 'CANCELLED'} self._tp = GCThreadPool() def getScript(self): yield pathShare('mon.dashboard.sh', pkg = 'grid_control_cms') def getTaskConfig(self): result = {'TASK_NAME': self._taskname, 'DB_EXEC': self._app, 'DATASETNICK': ''} result.update(Monitoring.getTaskConfig(self)) return result def getFiles(self): yield pathShare('mon.dashboard.sh', pkg = 'grid_control_cms') for fn in ('DashboardAPI.py', 'Logger.py', 'apmon.py', 'report.py'): yield pathShare('..', 'DashboardAPI', fn, pkg = 'grid_control_cms') def _publish(self, jobObj, jobNum, taskId, usermsg): (_, backend, rawId) = jobObj.gcID.split('.', 2) dashId = '%s_%s' % (jobNum, rawId) if 'http' not in jobObj.gcID: dashId = '%s_https://%s:/%s' % (jobNum, backend, rawId) msg = mergeDicts([{'taskId': taskId, 'jobId': dashId, 'sid': rawId}] + usermsg) DashboardAPI(taskId, dashId).publish(**filterDict(msg, vF = lambda v: v is not None)) def _start_publish(self, jobObj, jobNum, desc, message): taskId = self._task.substVars('dashboard task id', self._taskname, jobNum, addDict = {'DATASETNICK': ''}).strip('_') self._tp.start_thread('Notifying dashboard about %s of job %d' % (desc, jobNum), self._publish, jobObj, jobNum, taskId, message) # Called on job submission def onJobSubmit(self, wms, jobObj, jobNum): token = wms.getAccessToken(jobObj.gcID) jobInfo = self._task.getJobConfig(jobNum) self._start_publish(jobObj, jobNum, 'submission', [{ 'user': os.environ['LOGNAME'], 'GridName': '/CN=%s' % token.getUsername(), 'CMSUser': token.getUsername(), 'tool': 'grid-control', 'JSToolVersion': getVersion(), 'SubmissionType':'direct', 'tool_ui': os.environ.get('HOSTNAME', ''), 'application': jobInfo.get('SCRAM_PROJECTVERSION', self._app), 'exe': jobInfo.get('CMSSW_EXEC', 'shellscript'), 'taskType': self._tasktype, 'scheduler': wms.getObjectName(), 'vo': token.getGroup(), 'nevtJob': jobInfo.get('MAX_EVENTS', 0), 'datasetFull': jobInfo.get('DATASETPATH', 'none')}]) # Called on job status update and output def _updateDashboard(self, wms, jobObj, jobNum, data, addMsg): # Translate status into dashboard status message statusDashboard = self._statusMap.get(jobObj.state, 'PENDING') self._start_publish(jobObj, jobNum, 'status', [{'StatusValue': statusDashboard, 'StatusValueReason': data.get('reason', statusDashboard).upper(), 'StatusEnterTime': data.get('timestamp', time.strftime('%Y-%m-%d_%H:%M:%S', time.localtime())), 'StatusDestination': data.get('dest', '') }, addMsg]) def onJobUpdate(self, wms, jobObj, jobNum, data): self._updateDashboard(wms, jobObj, jobNum, jobObj, {}) def onJobOutput(self, wms, jobObj, jobNum, retCode): self._updateDashboard(wms, jobObj, jobNum, jobObj, {'ExeExitCode': retCode}) def onFinish(self): self._tp.wait_and_drop(self._runningMax)