def handleJobStatusChange(self, jobs, statusValue, statusMessage): """ _handleJobStatusChange_ Handle the submitted, completed or killed jobs: Publish the status information to the dashboard jobs must be a list of dictionaries with the following information (* denotes an optional argument): workflow -> Name of the workflow name -> unique name of the job retry_count -> retry count of the job statusValue -> Job status value statusMessage -> Message from the new status *location -> Computing element the job is destinated to *fwjr -> Post processing step information """ logging.info("Handling %d jobs" % len(jobs)) logging.debug("Handling jobs: %s" % jobs) for job in jobs: logging.debug("Sending info for job %s" % str(job)) package = {} package['MessageType'] = 'JobStatus' package['jobId'] = '%s_%i' % (job['name'], job['retry_count']) package['taskId'] = self.taskPrefix + job['workflow'] package['StatusValue'] = statusValue package['StatusValueReason'] = statusMessage package['StatusEnterTime'] = time.strftime(self.tsFormat, time.gmtime()) package['StatusDestination'] = job.get('location', 'NotAvailable') if job.get('plugin', None): package['scheduler'] = job['plugin'][:-6] logging.debug("Sending: %s" % str(package)) result = apmonSend(taskid = package['taskId'], jobid = package['jobId'], params = package, logr = logging, apmonServer = self.serverreport) if result != 0: msg = "Error %i sending info for submitted job %s via UDP\n" \ % (result, job['name']) msg += "Ignoring" logging.error(msg) logging.debug("Package sent: %s\n" % package) logging.debug("Host info: host %s, port %s" \ % (self.destHost, self.destPort)) apmonFree() if 'fwjr' in job: self.handleSteps(job) return
def handleJobStatusChange(self, jobs, statusValue, statusMessage): """ _handleJobStatusChange_ Handle the submitted, completed or killed jobs: Publish the status information to the dashboard jobs must be a list of dictionaries with the following information (* denotes an optional argument): workflow -> Name of the workflow name -> unique name of the job retry_count -> retry count of the job statusValue -> Job status value statusMessage -> Message from the new status *location -> Computing element the job is destinated to *fwjr -> Post processing step information """ logging.info("Handling %d jobs" % len(jobs)) logging.debug("Handling jobs: %s" % jobs) for job in jobs: logging.debug("Sending info for job %s" % str(job)) package = {} package['MessageType'] = 'JobStatus' package['jobId'] = '%s_%i' % (job['name'], job['retry_count']) package['taskId'] = self.taskPrefix + job['workflow'] package['StatusValue'] = statusValue package['StatusValueReason'] = statusMessage package['StatusEnterTime'] = time.strftime(self.tsFormat, time.gmtime()) package['StatusDestination'] = job.get('location', 'NotAvailable') if job.get('plugin', None): package['scheduler'] = job['plugin'][:-6] logging.debug("Sending: %s" % str(package)) result = apmonSend(taskid=package['taskId'], jobid=package['jobId'], params=package, logr=logging, apmonServer=self.serverreport) if result != 0: msg = "Error %i sending info for submitted job %s via UDP\n" \ % (result, job['name']) msg += "Ignoring" logging.error(msg) logging.debug("Package sent: %s\n" % package) logging.debug("Host info: host %s, port %s" \ % (self.destHost, self.destPort)) apmonFree() if 'fwjr' in job: self.handleSteps(job) return
def handleSteps(self, job): """ _handleSteps_ Handle the post-processing step information """ if job['fwjr'] == None: return steps = job['fwjr'].listSteps() for stepName in steps: step = job['fwjr'].retrieveStep(stepName) if not hasattr(step, 'counter'): continue counter = step.counter package = {} package.update(self.getPerformanceInformation(step)) package.update(self.getEventInformation(stepName, job['fwjr'])) trimmedPackage = {} for key in package: if package[key] != None: trimmedPackage['%d_%s' % (counter, key)] = package[key] package = trimmedPackage if not package: continue package['jobId'] = '%s_%i' % (job['name'], job['retry_count']) package['taskId'] = self.taskPrefix + job['workflow'] package['%d_stepName' % counter] = stepName logging.debug("Sending step info: %s" % str(package)) result = apmonSend(taskid=package['taskId'], jobid=package['jobId'], params=package, logr=logging, apmonServer=self.serverreport) if result != 0: msg = "Error %i sending info for completed job %s via UDP\n" \ % (result, job['name']) msg += "Ignoring" logging.error(msg) logging.debug("Package sent: %s\n" % package) logging.debug("Host info: host %s, port %s" \ % (self.destHost, self.destPort)) apmonFree() return
def handleSteps(self, job): """ _handleSteps_ Handle the post-processing step information """ if job['fwjr'] == None: return steps = job['fwjr'].listSteps() for stepName in steps: step = job['fwjr'].retrieveStep(stepName) if not hasattr(step, 'counter'): continue counter = step.counter package = {} package.update(self.getPerformanceInformation(step)) package.update(self.getEventInformation(stepName, job['fwjr'])) trimmedPackage = {} for key in package: if package[key] != None: trimmedPackage['%d_%s' % (counter, key)] = package[key] package = trimmedPackage if not package: continue package['jobId'] = '%s_%i' % (job['name'], job['retry_count']) package['taskId'] = self.taskPrefix + job['workflow'] package['%d_stepName' % counter] = stepName logging.debug("Sending step info: %s" % str(package)) result = apmonSend(taskid = package['taskId'], jobid = package['jobId'], params = package, logr = logging, apmonServer = self.serverreport) if result != 0: msg = "Error %i sending info for completed job %s via UDP\n" \ % (result, job['name']) msg += "Ignoring" logging.error(msg) logging.debug("Package sent: %s\n" % package) logging.debug("Host info: host %s, port %s" \ % (self.destHost, self.destPort)) apmonFree() return
def handleCreated(self, jobs): """ _handleCreated_ Handle the created jobs: Publish the jobs' meta information (and tasks' if not in the Cache) to the dashboard jobs must be a list of dictionaries with the following information (* denotes an optional argument): workflow -> Name of the workflow name -> unique name of the job retry_count -> retry count of the job taskType -> Workflow type (analysis, production, etc...) jobType -> Job type (merge, processing, etc...) *NEventsToprocess -> Number of events the job will process Additionally the job should carry information about the task according to the description of the addTask method """ logging.info ("Handling %d created jobs" % len(jobs)) logging.debug ("Handling created jobs: %s" % jobs) for job in jobs: logging.debug("Sending info for job %s" % str(job)) package = {} package['MessageType'] = 'JobMeta' package['taskId'] = self.taskPrefix + \ job['workflow'] package['jobId'] = '%s_%i' % (job['name'], job['retry_count']) package['TaskType'] = job['taskType'] package['JobType'] = job['jobType'] package['NEventsToProcess'] = job.get('nEventsToProc', 'NotAvailable') logging.debug("Sending: %s" % str(package)) result = apmonSend(taskid = package['taskId'], jobid = package['jobId'], params = package, logr = logging, apmonServer = self.serverreport) if result != 0: msg = "Error %i sending info for submitted job %s via UDP\n" \ % (result, job['name']) msg += "Ignoring" logging.error(msg) logging.debug("Package sent: %s\n" % package) logging.debug("Host info: host %s, port %s" \ % (self.destHost, self.destPort)) apmonFree() return
def handleSteps(self, job): """ _handleSteps_ Handle the post-processing step information """ performanceSteps = job['performance'] for stepName in performanceSteps.keys(): performance = performanceSteps[stepName] package = {} package['jobId'] = '%s_%i' % (job['name'], job['retryCount']) package['taskId'] = 'wmagent_%s' % job['requestName'] package['stepName'] = stepName package['PeakValueRss'] = performance['memory'].get('PeakValueRss', None) package['PeakValueVsize'] = performance['memory'].get('PeakValueVsize', None) package['writeTotalMB'] = performance['storage'].get('writeTotalMB', None) package['readPercentageOps'] = performance['storage'].get('readPercentageOps', None) package['readAveragekB'] = performance['storage'].get('readAveragekB', None) package['readTotalMB'] = performance['storage'].get('readTotalMB', None) package['readNumOps'] = performance['storage'].get('readNumOps', None) package['readCachePercentageOps'] = performance['storage'].get('readCachePercentageOps', None) package['readMBSec'] = performance['storage'].get('readMBSec', None) package['readMaxMSec'] = performance['storage'].get('readMaxMSec', None) package['readTotalSecs'] = performance['storage'].get('readTotalSecs', None) package['writeTotalSecs'] = performance['storage'].get('writeTotalSecs', None) package['TotalJobCPU'] = performance['cpu'].get('TotalJobCPU', None) package['TotalEventCPU'] = performance['cpu'].get('TotalEventCPU', None) package['AvgEventCPU'] = performance['cpu'].get('AvgEventCPU', None) package['AvgEventTime'] = performance['cpu'].get('AvgEventTime', None) package['MinEventCPU'] = performance['cpu'].get('MinEventCPU', None) package['MaxEventTime'] = performance['cpu'].get('MaxEventTime', None) package['TotalJobTime'] = performance['cpu'].get('TotalJobTime', None) package['MinEventTime'] = performance['cpu'].get('MinEventTime', None) package['MaxEventCPU'] = performance['cpu'].get('MaxEventCPU', None) logging.debug("Sending performance info: %s" % str(package)) result = apmonSend( taskid = package['taskId'], jobid = package['jobId'], params = package, logr = logging, apmonServer = self.serverreport) if result != 0: msg = "Error %i sending info for completed job %s via UDP\n" % (result, job['name']) msg += "Ignoring" logging.error(msg) self.sendAlert(6, msg = msg) logging.debug("Package sent: %s\n" % package) logging.debug("Host info: host %s, port %s, pass %s" \ % (self.config.DashboardReporter.dashboardHost, self.config.DashboardReporter.dashboardPort, getattr(self.config.DashboardReporter, 'dashboardPass', ''))) apmonFree() return
def handleCreated(self, jobs): """ _handleCreated_ Handle the created jobs: Publish the jobs' meta information (and tasks' if not in the Cache) to the dashboard jobs must be a list of dictionaries with the following information (* denotes an optional argument): workflow -> Name of the workflow name -> unique name of the job retry_count -> retry count of the job taskType -> Workflow type (analysis, production, etc...) jobType -> Job type (merge, processing, etc...) *NEventsToprocess -> Number of events the job will process Additionally the job should carry information about the task according to the description of the addTask method """ logging.info("Handling %d created jobs" % len(jobs)) logging.debug("Handling created jobs: %s" % jobs) for job in jobs: logging.debug("Sending info for job %s" % str(job)) package = {} package['MessageType'] = 'JobMeta' package['taskId'] = self.taskPrefix + \ job['workflow'] package['jobId'] = '%s_%i' % (job['name'], job['retry_count']) package['TaskType'] = job['taskType'] package['JobType'] = job['jobType'] package['NEventsToProcess'] = job.get('nEventsToProc', 'NotAvailable') logging.debug("Sending: %s" % str(package)) result = apmonSend(taskid=package['taskId'], jobid=package['jobId'], params=package, logr=logging, apmonServer=self.serverreport) if result != 0: msg = "Error %i sending info for submitted job %s via UDP\n" \ % (result, job['name']) msg += "Ignoring" logging.error(msg) logging.debug("Package sent: %s\n" % package) logging.debug("Host info: host %s, port %s" \ % (self.destHost, self.destPort)) apmonFree() return
def testApmonInstance(self): """ _testApmonInstance_ Just test initialization of apmon Instance """ print("Apmon Configuration %s" % APMONCONF) apmon = getApmonInstance(apmonServer=APMONCONF) self.assertTrue(apmon.initializedOK()) # Free up apmon instance and check if it was successfull apmonFree() self.assertFalse(APMONINIT) self.assertEqual(None, APMONINSTANCE)
def testApmonInstance(self): """ _testApmonInstance_ Just test initialization of apmon Instance """ print("Apmon Configuration %s" % APMONCONF) apmon = getApmonInstance(apmonServer=APMONCONF) self.assertTrue(apmon.initializedOK()) # Free up apmon instance and check if it was successfull apmonFree() self.assertFalse(APMONINIT) self.assertEqual(None, APMONINSTANCE)
def handleJobStatusChange(self, jobs, statusValue, statusMessage): """ _handleJobStatusChange_ Handle the submitted, completed or killed jobs: Publish the status information to the dashboard jobs must be a list of dictionaries with the following information (* denotes an optional argument): workflow -> Name of the workflow name -> unique name of the job retry_count -> retry count of the job statusValue -> Job status value statusMessage -> Message from the new status *location -> Computing element the job is destinated to *fwjr -> Post processing step information """ logging.info("Handling jobs: %s" % jobs) for job in jobs: logging.info("Sending info for job %s" % str(job)) package = {} package["MessageType"] = "JobStatus" package["jobId"] = "%s_%i" % (job["name"], job["retry_count"]) package["taskId"] = self.taskPrefix + job["workflow"] package["StatusValue"] = statusValue package["StatusValueReason"] = statusMessage package["StatusEnterTime"] = time.strftime(self.tsFormat, time.gmtime()) package["StatusDestination"] = job.get("location", "NotAvailable") logging.info("Sending: %s" % str(package)) result = apmonSend( taskid=package["taskId"], jobid=package["jobId"], params=package, logr=logging, apmonServer=self.serverreport, ) if result != 0: msg = "Error %i sending info for submitted job %s via UDP\n" % (result, job["name"]) msg += "Ignoring" logging.error(msg) logging.debug("Package sent: %s\n" % package) logging.debug("Host info: host %s, port %s" % (self.destHost, self.destPort)) apmonFree() if "fwjr" in job: self.handleSteps(job) return
def handleSubmitted(self, jobs): """ _handleSubmitted_ Handle the submitted jobs: Send them to Dashboard via UDP """ logging.info("Handling jobs to be submitted: %s" % jobs) for job in jobs: taskName = job['requestName'] if not taskName in self.taskCache: self.addTask(name = taskName, user = job.get('user', None)) logging.info("Sending info for task %s" % str(job)) package = {} package['jobId'] = '%s_%i' % (job['name'], job['retryCount']) package['taskId'] = 'wmagent_%s' % taskName package['GridJobID'] = 'NotAvailable' package['retryCount'] = job['retryCount'] package['MessageTS'] = time.time() package['MessageType'] = 'JobMeta' package['JobType'] = job['jobType'] package['TaskType'] = job['taskType'] package['StatusValue'] = 'submitted' package['scheduler'] = 'BossAir' package['StatusEnterTime'] = job.get('timestamp', time.time()) logging.info("Sending: %s" % str(package)) result = apmonSend(taskid = package['taskId'], jobid = package['jobId'], params = package, logr = logging, apmonServer = self.serverreport) if result != 0: msg = "Error %i sending info for submitted job %s via UDP\n" % (result, job['name']) msg += "Ignoring" logging.error(msg) self.sendAlert(6, msg = msg) logging.debug("Package sent: %s\n" % package) logging.debug("Host info: host %s, port %s, pass %s" \ % (self.config.DashboardReporter.dashboardHost, self.config.DashboardReporter.dashboardPort, getattr(self.config.DashboardReporter, 'dashboardPass', ''))) apmonFree() #self.apmonsender.free() return
def addTask(self, task): """ _addTask_ Add a task to the Dashboard, jobs must contain the following information about the task: application -> CMSSW release nevtJob -> Number of events per job tool -> JobSubmission tool (like Condor? or WMAgent) JSToolVersion -> 'tool' version GridName -> Subject of user grid proxy scheduler -> Scheduler TaskType -> Type of activity datasetFull -> Input dataset CMSUser -> owner of the workflow """ taskName = task["name"] package = {} package["MessageType"] = "TaskMeta" package["application"] = task["application"] package["nevtJob"] = task["nevtJob"] package["tool"] = "WMAgent" package["JSToolVersion"] = __version__ package["GridName"] = task["GridName"] package["scheduler"] = task["scheduler"] package["TaskType"] = task["TaskType"] package["TaskName"] = self.taskPrefix + taskName package["JobName"] = "taskMeta" package["datasetFull"] = task["datasetFull"] package["CMSUser"] = task["user"] logging.debug("Sending task info: %s" % str(package)) result = apmonSend( taskid=package["TaskName"], jobid=package["JobName"], params=package, logr=logging, apmonServer=self.serverreport, ) if result != 0: msg = "Error %i sending info for new task %s via UDP\n" % (result, taskName) msg += "Ignoring" logging.error(msg) logging.debug("Package sent: %s\n" % package) logging.debug("Host info: host %s, port %s" % (self.destHost, self.destPort)) apmonFree()
def send_final_dashboard_update(data, config, monalisa): cputime = data['cpu_time'] events_per_run = data['events_per_run'] exe_exit_code = data['exe_exit_code'] exe_time = data['task_timing']['stage_out_end'] - data['task_timing'][ 'prologue_end'] task_exit_code = data['task_exit_code'] total_time = data['task_timing']['stage_out_end'] - data['task_timing'][ 'wrapper_start'] stageout_exit_code = data['stageout_exit_code'] stageout_se = data['output_storage_element'] logger.debug("Execution time {}".format(total_time)) logger.debug("Exiting with code {}".format(task_exit_code)) logger.debug("Reporting ExeExitCode {}".format(exe_exit_code)) logger.debug("Reporting StageOutSE {}".format(stageout_se)) logger.debug("Reporting StageOutExitCode {}".format(stageout_exit_code)) parameters = { 'ExeTime': str(exe_time), 'ExeExitCode': str(exe_exit_code), 'JobExitCode': str(task_exit_code), 'JobExitReason': '', 'StageOutSE': stageout_se, 'StageOutExitStatus': str(stageout_exit_code), 'StageOutExitStatusReason': 'Copy succedeed with srm-lcg utils', 'CrabUserCpuTime': str(cputime), 'CrabWrapperTime': str(total_time), 'WCCPU': str(total_time), 'NoEventsPerRun': str(events_per_run), 'NbEvPerRun': str(events_per_run), 'NEventsProcessed': str(events_per_run) } try: parameters.update( {'CrabCpuPercentage': str(float(cputime) / float(total_time))}) except Exception: pass monitorid = str(config['monitoring']['monitorid']) taskid = str(config['monitoring']['taskid']) apmonSend(taskid, monitorid, parameters, logging.getLogger('mona'), monalisa) apmonFree()
def publish(self, data, redundancy = 1): """ _publish_ Publish information in this object to the Dashboard using the ApMon interface and the destinations stored in this instance. redunancy is the amount to times to publish this information """ logging.info("About to send UDP package to dashboard: %s" % data) logging.info("Using address %s" % self.server) apmonSend(taskid = self.taskName, jobid = self.jobName, params = data, logr = logging, apmonServer = self.server) apmonFree() return
def addTask(self, task): """ _addTask_ Add a task to the Dashboard, jobs must contain the following information about the task: application -> CMSSW release tool -> JobSubmission tool (like Condor? or WMAgent) JSToolVersion -> 'tool' version TaskType -> Type of activity datasetFull -> Input dataset CMSUser -> owner of the workflow """ taskName = task['name'] package = {} package['MessageType'] = 'TaskMeta' package['application'] = task['application'] package['tool'] = 'WMAgent' package['JSToolVersion'] = __version__ package['TaskType'] = task['TaskType'] package['TaskName'] = self.taskPrefix + taskName package['JobName'] = 'taskMeta' package['datasetFull'] = task['datasetFull'] package['CMSUser'] = task['user'] logging.info("Sending %s info" % taskName) logging.debug("Sending task info: %s" % str(package)) result = apmonSend(taskid=package['TaskName'], jobid=package['JobName'], params=package, logr=logging, apmonServer=self.serverreport) if result != 0: msg = "Error %i sending info for new task %s via UDP\n" % ( result, taskName) msg += "Ignoring" logging.error(msg) logging.debug("Package sent: %s\n" % package) logging.debug("Host info: host %s, port %s" \ % (self.destHost, self.destPort)) apmonFree()
def addTask(self, task): """ _addTask_ Add a task to the Dashboard, jobs must contain the following information about the task: application -> CMSSW release tool -> JobSubmission tool (like Condor? or WMAgent) JSToolVersion -> 'tool' version TaskType -> Type of activity datasetFull -> Input dataset CMSUser -> owner of the workflow """ taskName = task['name'] package = {} package['MessageType'] = 'TaskMeta' package['application'] = task['application'] package['tool'] = 'WMAgent' package['JSToolVersion'] = __version__ package['TaskType'] = task['TaskType'] package['TaskName'] = self.taskPrefix + taskName package['JobName'] = 'taskMeta' package['datasetFull'] = task['datasetFull'] package['CMSUser'] = task['user'] logging.info("Sending %s info" % taskName) logging.debug("Sending task info: %s" % str(package)) result = apmonSend(taskid = package['TaskName'], jobid = package['JobName'], params = package, logr = logging, apmonServer = self.serverreport) if result != 0: msg = "Error %i sending info for new task %s via UDP\n" % (result, taskName) msg += "Ignoring" logging.error(msg) logging.debug("Package sent: %s\n" % package) logging.debug("Host info: host %s, port %s" \ % (self.destHost, self.destPort)) apmonFree()
def publish(self, data, redundancy=1): """ _publish_ Publish information in this object to the Dashboard using the ApMon interface and the destinations stored in this instance. redunancy is the amount to times to publish this information """ logging.info("About to send UDP package to dashboard: %s" % data) logging.info("Using address %s" % self.server) apmonSend(taskid=self.taskName, jobid=self.jobName, params=data, logr=logging, apmonServer=self.server) apmonFree() return
def addTask(self, name, user): """ _addTask_ Add a task to the Dashboard """ package = {} package['MessageType'] = 'TaskMeta' package['MessageTS'] = time.time() package['taskId'] = 'wmagent_%s' % name package['jobId'] = 'taskMeta' package['JSTool'] = 'WMAgent' package['JSToolVersion'] = __version__ package['CMSUser'] = user package['Workflow'] = name package['AgentName'] = self.agentName logging.info("Sending info for task %s" % str(name)) logging.debug("Sending task info: %s" % str(package)) result = apmonSend( taskid = package['taskId'], jobid = package['jobId'], params = package, logr = logging, apmonServer = self.serverreport) if result != 0: msg = "Error %i sending info for new task %s via UDP\n" % (result, name) msg += "Ignoring" logging.error(msg) self.sendAlert(6, msg = msg) logging.debug("Package sent: %s\n" % package) logging.debug("Host info: host %s, port %s, pass %s" \ % (self.config.DashboardReporter.dashboardHost, self.config.DashboardReporter.dashboardPort, getattr(self.config.DashboardReporter, 'dashboardPass', ''))) else: self.taskCache.append(name) #self.apmonsender.free() apmonFree()
def handleCompleted(self, jobs): """ _handleCompleted_ Handle the completed jobs: Send them to Dashboard via UDP """ logging.info("Handling jobs to be completed: %s" % jobs) for job in jobs: package = {} package['jobId'] = '%s_%i' % (job['name'], job['retryCount']) package['taskId'] = 'wmagent_%s' % job['requestName'] package['GridJobID'] = job['name'] package['retryCount'] = job['retryCount'] package['MessageTS'] = time.time() package['MessageType'] = 'JobStatus' package['StatusValue'] = job['finalState'] package['StatusEnterTime'] = job['timestamp'] package['JobExitCode'] = job['exitCode'] logging.info("Sending completed info: %s" % str(package)) result = apmonSend( taskid = package['taskId'], jobid = package['jobId'], params = package, logr = logging, apmonServer = self.serverreport) if result != 0: msg = "Error %i sending info for completed job %s via UDP\n" % (result, job['name']) msg += "Ignoring" logging.error(msg) self.sendAlert(6, msg = msg) logging.debug("Package sent: %s\n" % package) logging.debug("Host info: host %s, port %s, pass %s" \ % (self.config.DashboardReporter.dashboardHost, self.config.DashboardReporter.dashboardPort, getattr(self.config.DashboardReporter, 'dashboardPass', ''))) apmonFree() self.handleSteps(job = job) return
def send_initial_dashboard_update(data, config, monalisa): # Dashboard does not like Unicode, just ASCII encoding monitorid = str(config['monitoring']['monitorid']) syncid = str(config['monitoring']['syncid']) taskid = str(config['monitoring']['taskid']) try: if os.environ.get("PARROT_ENABLED", "FALSE") == "TRUE": raise ValueError() sync_ce = loadSiteLocalConfig().siteName except Exception: for envvar in [ "GLIDEIN_Gatekeeper", "OSG_HOSTNAME", "CONDORCE_COLLECTOR_HOST" ]: if envvar in os.environ: sync_ce = os.environ[envvar] break else: host = socket.getfqdn() sync_ce = config['default host'] if host.rsplit('.')[-2:] == sync_ce.rsplit('.')[-2:]: sync_ce = config['default ce'] else: sync_ce = 'Unknown' logger.info("using sync CE {}".format(sync_ce)) parameters = { 'ExeStart': str(config['executable']), 'NCores': config.get('cores', 1), 'SyncCE': sync_ce, 'SyncGridJobId': syncid, 'WNHostName': socket.getfqdn() } apmonSend(taskid, monitorid, parameters, logging.getLogger('mona'), monalisa) apmonFree()
def publish(self, redundancy = 1, data = None): """ _publish_ Publish information in this object to the Dashboard using the ApMon interface and the destinations stored in this instance. redunancy is the amount to times to publish this information """ #if self.publisher == None: # self._InitPublisher() #self.publisher.connect() toPublish = {} if data: toPublish = data else: toPublish.update(self) for key, value in toPublish.items(): if value == None: del toPublish[key] logging.debug("About to send UDP package to dashboard: %s" % toPublish) logging.debug("Using address %s" % self.server) apmonSend(taskid = self.taskName, jobid = self.jobName, params = toPublish, logr = logging, apmonServer = self.server) apmonFree() #for i in range(1, redundancy+1): # self.publisher.send(**toPublish) # #self.publisher.disconnect() return
def handleSteps(self, job): """ _handleSteps_ Handle the post-processing step information """ if job['fwjr'] == None: return steps = job['fwjr'].listSteps() for stepName in steps: step = job['fwjr'].retrieveStep(stepName) if not hasattr(step, 'counter'): continue counter = step.counter package = {} package.update(self.getPerformanceInformation(step)) package.update(self.getEventInformation(stepName, job['fwjr'])) # Input files should just be appended onto inputFiles instead of given a step # # per https://hypernews.cern.ch/HyperNews/CMS/get/comp-monitoring/326.html inputFilePackage = self.getInputFilesInformation(step) if inputFilePackage: if 'inputFiles' in package: package['inputFiles'] += ';' + inputFilePackage['inputFiles'] else: package.update(self.getInputFilesInformation(step)) trimmedPackage = {} for key in package: if key in ['inputFiles', 'Basename', 'inputBlocks']: trimmedPackage[key] = package[key] elif package[key] != None: trimmedPackage['%d_%s' % (counter, key)] = package[key] package = trimmedPackage if not package: continue package['jobId'] = '%s_%i' % (job['name'], job['retry_count']) package['taskId'] = self.taskPrefix + job['workflow'] package['%d_stepName' % counter] = stepName logging.debug("Sending step info: %s" % str(package)) result = apmonSend(taskid = package['taskId'], jobid = package['jobId'], params = package, logr = logging, apmonServer = self.serverreport) if result != 0: msg = "Error %i sending info for completed job %s via UDP\n" \ % (result, job['name']) msg += "Ignoring" logging.error(msg) logging.debug("Package sent: %s\n" % package) logging.debug("Host info: host %s, port %s" \ % (self.destHost, self.destPort)) apmonFree() return
def handleSteps(self, job): """ _handleSteps_ Handle the post-processing step information """ if job["fwjr"] == None: return performanceSteps = job["fwjr"].listSteps() for stepName in performanceSteps: step = job["fwjr"].retrieveStep(stepName) if not hasattr(step, "performance"): continue performance = step.performance toReport = 3 if not hasattr(performance, "memory"): performance.section_("memory") toReport -= 1 if not hasattr(performance, "storage"): performance.section_("storage") toReport -= 1 if not hasattr(performance, "cpu"): performance.section_("cpu") toReport -= 1 # There's nothing to report, get out if not toReport: continue package = {} package["jobId"] = "%s_%i" % (job["name"], job["retry_count"]) package["taskId"] = self.taskPrefix + job["workflow"] package["stepName"] = stepName package["PeakValueRss"] = getattr(performance.memory, "PeakValueRss", None) package["PeakValuePss"] = getattr(performance.memory, "PeakValuePss", None) package["PeakValueVsize"] = getattr(performance.memory, "PeakValueVsize", None) package["writeTotalMB"] = getattr(performance.storage, "writeTotalMB", None) package["readPercentageOps"] = getattr(performance.storage, "readPercentageOps", None) package["readAveragekB"] = getattr(performance.storage, "readAveragekB", None) package["readTotalMB"] = getattr(performance.storage, "readTotalMB", None) package["readNumOps"] = getattr(performance.storage, "readNumOps", None) package["readCachePercentageOps"] = getattr(performance.storage, "readCachePercentageOps", None) package["readMBSec"] = getattr(performance.storage, "readMBSec", None) package["readMaxMSec"] = getattr(performance.storage, "readMaxMSec", None) package["readTotalSecs"] = getattr(performance.storage, "readTotalSecs", None) package["writeTotalSecs"] = getattr(performance.storage, "writeTotalSecs", None) package["TotalJobCPU"] = getattr(performance.cpu, "TotalJobCPU", None) package["AvgEventCPU"] = getattr(performance.cpu, "AvgEventCPU", None) package["MaxEventTime"] = getattr(performance.cpu, "MaxEventTime", None) package["AvgEventTime"] = getattr(performance.cpu, "AvgEventTime", None) package["MinEventCPU"] = getattr(performance.cpu, "MinEventCPU", None) package["TotalEventCPU"] = getattr(performance.cpu, "TotalEventCPU", None) package["TotalJobTime"] = getattr(performance.cpu, "TotalJobTime", None) package["MinEventTime"] = getattr(performance.cpu, "MinEventTime", None) package["MaxEventCPU"] = getattr(performance.cpu, "MaxEventCPU", None) logging.debug("Sending performance info: %s" % str(package)) result = apmonSend( taskid=package["taskId"], jobid=package["jobId"], params=package, logr=logging, apmonServer=self.serverreport, ) if result != 0: msg = "Error %i sending info for completed job %s via UDP\n" % (result, job["name"]) msg += "Ignoring" logging.error(msg) logging.debug("Package sent: %s\n" % package) logging.debug("Host info: host %s, port %s" % (self.destHost, self.destPort)) apmonFree() return
def handleSteps(self, job): """ _handleSteps_ Handle the post-processing step information """ if job['fwjr'] == None: return steps = job['fwjr'].listSteps() for stepName in steps: step = job['fwjr'].retrieveStep(stepName) if not hasattr(step, 'counter'): continue counter = step.counter package = {} package.update(self.getPerformanceInformation(step)) package.update(self.getEventInformation(stepName, job['fwjr'])) # Input files should just be appended onto inputFiles instead of given a step # # per https://hypernews.cern.ch/HyperNews/CMS/get/comp-monitoring/326.html inputFilePackage = self.getInputFilesInformation(step) if inputFilePackage: if 'inputFiles' in package: package[ 'inputFiles'] += ';' + inputFilePackage['inputFiles'] else: package.update(self.getInputFilesInformation(step)) trimmedPackage = {} for key in package: if key in ['inputFiles', 'Basename', 'inputBlocks']: trimmedPackage[key] = package[key] elif package[key] != None: trimmedPackage['%d_%s' % (counter, key)] = package[key] package = trimmedPackage if not package: continue package['jobId'] = '%s_%i' % (job['name'], job['retry_count']) package['taskId'] = self.taskPrefix + job['workflow'] package['%d_stepName' % counter] = stepName logging.debug("Sending step info: %s" % str(package)) result = apmonSend(taskid=package['taskId'], jobid=package['jobId'], params=package, logr=logging, apmonServer=self.serverreport) if result != 0: msg = "Error %i sending info for completed job %s via UDP\n" \ % (result, job['name']) msg += "Ignoring" logging.error(msg) logging.debug("Package sent: %s\n" % package) logging.debug("Host info: host %s, port %s" \ % (self.destHost, self.destPort)) apmonFree() return
def free(self): apmonFree()