Пример #1
0
class ScriptMonitoring(Monitoring):
	alias = ['scripts']
	configSections = EventHandler.configSections + ['scripts']

	def __init__(self, config, name, task):
		Monitoring.__init__(self, config, name, task)
		self._silent = config.getBool('silent', True, onChange = None)
		self._evtSubmit = config.getCommand('on submit', '', onChange = None)
		self._evtStatus = config.getCommand('on status', '', onChange = None)
		self._evtOutput = config.getCommand('on output', '', onChange = None)
		self._evtFinish = config.getCommand('on finish', '', onChange = None)
		self._runningMax = config.getTime('script runtime', 5, onChange = None)
		self._workPath = config.getWorkPath()
		self._tp = GCThreadPool()

	# Get both task and job config / state dicts
	def _scriptThread(self, script, jobNum = None, jobObj = None, allDict = None):
		try:
			tmp = {}
			if jobNum is not None:
				tmp.update(self._task.getSubmitInfo(jobNum))
			if jobObj is not None:
				tmp.update(jobObj.getAll())
			tmp['WORKDIR'] = self._workPath
			tmp.update(self._task.getTaskConfig())
			if jobNum is not None:
				tmp.update(self._task.getJobConfig(jobNum))
				tmp.update(self._task.getSubmitInfo(jobNum))
			tmp.update(allDict or {})
			for key, value in tmp.items():
				if not key.startswith('GC_'):
					key = 'GC_' + key
				os.environ[key] = str(value)

			script = self._task.substVars(script, jobNum, tmp)
			if not self._silent:
				proc = LocalProcess(script)
				self._log.info(proc.get_output(timeout = self._runningMax))
			else:
				os.system(script)
		except Exception:
			self._log.exception('Error while running user script!')

	def _runInBackground(self, script, jobNum = None, jobObj = None, addDict = None):
		if script != '':
			self._tp.start_thread('Running monitoring script %s' % script,
				self._scriptThread, script, jobNum, jobObj, addDict)

	# Called on job submission
	def onJobSubmit(self, wms, jobObj, jobNum):
		self._runInBackground(self._evtSubmit, jobNum, jobObj)

	# Called on job status update
	def onJobUpdate(self, wms, jobObj, jobNum, data):
		self._runInBackground(self._evtStatus, jobNum, jobObj, {'STATUS': Job.enum2str(jobObj.state)})

	# Called on job status update
	def onJobOutput(self, wms, jobObj, jobNum, retCode):
		self._runInBackground(self._evtOutput, jobNum, jobObj, {'RETCODE': retCode})

	# Called at the end of the task
	def onTaskFinish(self, nJobs):
		self._runInBackground(self._evtFinish, addDict = {'NJOBS': nJobs})
		self._tp.wait_and_drop(self._runningMax)
Пример #2
0
class ScriptMonitoring(Monitoring):
    alias = ['scripts']
    configSections = EventHandler.configSections + ['scripts']

    def __init__(self, config, name, task):
        Monitoring.__init__(self, config, name, task)
        self._silent = config.getBool('silent', True, onChange=None)
        self._evtSubmit = config.getCommand('on submit', '', onChange=None)
        self._evtStatus = config.getCommand('on status', '', onChange=None)
        self._evtOutput = config.getCommand('on output', '', onChange=None)
        self._evtFinish = config.getCommand('on finish', '', onChange=None)
        self._runningMax = config.getTime('script runtime', 5, onChange=None)
        self._workPath = config.getWorkPath()
        self._tp = GCThreadPool()

    # Get both task and job config / state dicts
    def _scriptThread(self, script, jobNum=None, jobObj=None, allDict=None):
        try:
            tmp = {}
            if jobNum is not None:
                tmp.update(self._task.getSubmitInfo(jobNum))
            if jobObj is not None:
                tmp.update(jobObj.getAll())
            tmp['WORKDIR'] = self._workPath
            tmp.update(self._task.getTaskConfig())
            if jobNum is not None:
                tmp.update(self._task.getJobConfig(jobNum))
                tmp.update(self._task.getSubmitInfo(jobNum))
            tmp.update(allDict or {})
            for key, value in tmp.items():
                if not key.startswith('GC_'):
                    key = 'GC_' + key
                os.environ[key] = str(value)

            script = self._task.substVars(script, jobNum, tmp)
            if not self._silent:
                proc = LocalProcess(script)
                self._log.info(proc.get_output(timeout=self._runningMax))
            else:
                os.system(script)
        except Exception:
            self._log.exception('Error while running user script!')

    def _runInBackground(self, script, jobNum=None, jobObj=None, addDict=None):
        if script != '':
            self._tp.start_thread('Running monitoring script %s' % script,
                                  self._scriptThread, script, jobNum, jobObj,
                                  addDict)

    # Called on job submission
    def onJobSubmit(self, wms, jobObj, jobNum):
        self._runInBackground(self._evtSubmit, jobNum, jobObj)

    # Called on job status update
    def onJobUpdate(self, wms, jobObj, jobNum, data):
        self._runInBackground(self._evtStatus, jobNum, jobObj,
                              {'STATUS': Job.enum2str(jobObj.state)})

    # Called on job status update
    def onJobOutput(self, wms, jobObj, jobNum, retCode):
        self._runInBackground(self._evtOutput, jobNum, jobObj,
                              {'RETCODE': retCode})

    # Called at the end of the task
    def onTaskFinish(self, nJobs):
        self._runInBackground(self._evtFinish, addDict={'NJOBS': nJobs})
        self._tp.wait_and_drop(self._runningMax)
Пример #3
0
class DashBoard(Monitoring):
    configSections = Monitoring.configSections + ['dashboard']

    def __init__(self, config, name, task):
        Monitoring.__init__(self, config, name, task)
        jobDesc = task.getDescription(
            None)  # TODO: use the other variables for monitoring
        self._app = config.get('application', 'shellscript', onChange=None)
        self._runningMax = config.getTime('dashboard timeout',
                                          5,
                                          onChange=None)
        self._tasktype = config.get('task',
                                    jobDesc.jobType or 'analysis',
                                    onChange=None)
        self._taskname = config.get('task name',
                                    '@GC_TASK_ID@_@DATASETNICK@',
                                    onChange=None)
        self._statusMap = {
            Job.DONE: 'DONE',
            Job.FAILED: 'DONE',
            Job.SUCCESS: 'DONE',
            Job.RUNNING: 'RUNNING',
            Job.ABORTED: 'ABORTED',
            Job.CANCELLED: 'CANCELLED'
        }
        self._tp = GCThreadPool()

    def getScript(self):
        yield pathShare('mon.dashboard.sh', pkg='grid_control_cms')

    def getTaskConfig(self):
        result = {
            'TASK_NAME': self._taskname,
            'DB_EXEC': self._app,
            'DATASETNICK': ''
        }
        result.update(Monitoring.getTaskConfig(self))
        return result

    def getFiles(self):
        yield pathShare('mon.dashboard.sh', pkg='grid_control_cms')
        for fn in ('DashboardAPI.py', 'Logger.py', 'apmon.py', 'report.py'):
            yield pathShare('..', 'DashboardAPI', fn, pkg='grid_control_cms')

    def _publish(self, jobObj, jobNum, taskId, usermsg):
        (_, backend, rawId) = jobObj.gcID.split('.', 2)
        dashId = '%s_%s' % (jobNum, rawId)
        if 'http' not in jobObj.gcID:
            dashId = '%s_https://%s:/%s' % (jobNum, backend, rawId)
        msg = mergeDicts([{
            'taskId': taskId,
            'jobId': dashId,
            'sid': rawId
        }] + usermsg)
        DashboardAPI(
            taskId,
            dashId).publish(**filterDict(msg, vF=lambda v: v is not None))

    def _start_publish(self, jobObj, jobNum, desc, message):
        taskId = self._task.substVars('dashboard task id',
                                      self._taskname,
                                      jobNum,
                                      addDict={
                                          'DATASETNICK': ''
                                      }).strip('_')
        self._tp.start_thread(
            'Notifying dashboard about %s of job %d' % (desc, jobNum),
            self._publish, jobObj, jobNum, taskId, message)

    # Called on job submission
    def onJobSubmit(self, wms, jobObj, jobNum):
        token = wms.getAccessToken(jobObj.gcID)
        jobInfo = self._task.getJobConfig(jobNum)
        self._start_publish(
            jobObj, jobNum, 'submission',
            [{
                'user': os.environ['LOGNAME'],
                'GridName': '/CN=%s' % token.getUsername(),
                'CMSUser': token.getUsername(),
                'tool': 'grid-control',
                'JSToolVersion': getVersion(),
                'SubmissionType': 'direct',
                'tool_ui': os.environ.get('HOSTNAME', ''),
                'application': jobInfo.get('SCRAM_PROJECTVERSION', self._app),
                'exe': jobInfo.get('CMSSW_EXEC', 'shellscript'),
                'taskType': self._tasktype,
                'scheduler': wms.getObjectName(),
                'vo': token.getGroup(),
                'nevtJob': jobInfo.get('MAX_EVENTS', 0),
                'datasetFull': jobInfo.get('DATASETPATH', 'none')
            }])

    # Called on job status update and output
    def _updateDashboard(self, wms, jobObj, jobNum, data, addMsg):
        # Translate status into dashboard status message
        statusDashboard = self._statusMap.get(jobObj.state, 'PENDING')
        self._start_publish(jobObj, jobNum, 'status', [{
            'StatusValue':
            statusDashboard,
            'StatusValueReason':
            data.get('reason', statusDashboard).upper(),
            'StatusEnterTime':
            data.get('timestamp',
                     time.strftime('%Y-%m-%d_%H:%M:%S', time.localtime())),
            'StatusDestination':
            data.get('dest', '')
        }, addMsg])

    def onJobUpdate(self, wms, jobObj, jobNum, data):
        self._updateDashboard(wms, jobObj, jobNum, jobObj, {})

    def onJobOutput(self, wms, jobObj, jobNum, retCode):
        self._updateDashboard(wms, jobObj, jobNum, jobObj,
                              {'ExeExitCode': retCode})

    def onFinish(self):
        self._tp.wait_and_drop(self._runningMax)
Пример #4
0
class DashBoard(Monitoring):
	configSections = Monitoring.configSections + ['dashboard']

	def __init__(self, config, name, task):
		Monitoring.__init__(self, config, name, task)
		jobDesc = task.getDescription(None) # TODO: use the other variables for monitoring
		self._app = config.get('application', 'shellscript', onChange = None)
		self._runningMax = config.getTime('dashboard timeout', 5, onChange = None)
		self._tasktype = config.get('task', jobDesc.jobType or 'analysis', onChange = None)
		self._taskname = config.get('task name', '@GC_TASK_ID@_@DATASETNICK@', onChange = None)
		self._statusMap = {Job.DONE: 'DONE', Job.FAILED: 'DONE', Job.SUCCESS: 'DONE',
			Job.RUNNING: 'RUNNING', Job.ABORTED: 'ABORTED', Job.CANCELLED: 'CANCELLED'}
		self._tp = GCThreadPool()


	def getScript(self):
		yield pathShare('mon.dashboard.sh', pkg = 'grid_control_cms')


	def getTaskConfig(self):
		result = {'TASK_NAME': self._taskname, 'DB_EXEC': self._app, 'DATASETNICK': ''}
		result.update(Monitoring.getTaskConfig(self))
		return result


	def getFiles(self):
		yield pathShare('mon.dashboard.sh', pkg = 'grid_control_cms')
		for fn in ('DashboardAPI.py', 'Logger.py', 'apmon.py', 'report.py'):
			yield pathShare('..', 'DashboardAPI', fn, pkg = 'grid_control_cms')


	def _publish(self, jobObj, jobNum, taskId, usermsg):
		(_, backend, rawId) = jobObj.gcID.split('.', 2)
		dashId = '%s_%s' % (jobNum, rawId)
		if 'http' not in jobObj.gcID:
			dashId = '%s_https://%s:/%s' % (jobNum, backend, rawId)
		msg = mergeDicts([{'taskId': taskId, 'jobId': dashId, 'sid': rawId}] + usermsg)
		DashboardAPI(taskId, dashId).publish(**filterDict(msg, vF = lambda v: v is not None))


	def _start_publish(self, jobObj, jobNum, desc, message):
		taskId = self._task.substVars('dashboard task id', self._taskname, jobNum,
			addDict = {'DATASETNICK': ''}).strip('_')
		self._tp.start_thread('Notifying dashboard about %s of job %d' % (desc, jobNum),
			self._publish, jobObj, jobNum, taskId, message)


	# Called on job submission
	def onJobSubmit(self, wms, jobObj, jobNum):
		token = wms.getAccessToken(jobObj.gcID)
		jobInfo = self._task.getJobConfig(jobNum)
		self._start_publish(jobObj, jobNum, 'submission', [{
			'user': os.environ['LOGNAME'], 'GridName': '/CN=%s' % token.getUsername(), 'CMSUser': token.getUsername(),
			'tool': 'grid-control', 'JSToolVersion': getVersion(),
			'SubmissionType':'direct', 'tool_ui': os.environ.get('HOSTNAME', ''),
			'application': jobInfo.get('SCRAM_PROJECTVERSION', self._app),
			'exe': jobInfo.get('CMSSW_EXEC', 'shellscript'), 'taskType': self._tasktype,
			'scheduler': wms.getObjectName(), 'vo': token.getGroup(),
			'nevtJob': jobInfo.get('MAX_EVENTS', 0),
			'datasetFull': jobInfo.get('DATASETPATH', 'none')}])


	# Called on job status update and output
	def _updateDashboard(self, wms, jobObj, jobNum, data, addMsg):
		# Translate status into dashboard status message
		statusDashboard = self._statusMap.get(jobObj.state, 'PENDING')
		self._start_publish(jobObj, jobNum, 'status', [{'StatusValue': statusDashboard,
			'StatusValueReason': data.get('reason', statusDashboard).upper(),
			'StatusEnterTime': data.get('timestamp', time.strftime('%Y-%m-%d_%H:%M:%S', time.localtime())),
			'StatusDestination': data.get('dest', '') }, addMsg])


	def onJobUpdate(self, wms, jobObj, jobNum, data):
		self._updateDashboard(wms, jobObj, jobNum, jobObj, {})


	def onJobOutput(self, wms, jobObj, jobNum, retCode):
		self._updateDashboard(wms, jobObj, jobNum, jobObj, {'ExeExitCode': retCode})


	def onFinish(self):
		self._tp.wait_and_drop(self._runningMax)