Пример #1
0
    def handleJobStatusChange(self, jobs, statusValue, statusMessage):
        """
        _handleJobStatusChange_

        Handle the submitted, completed or killed jobs:
        Publish the status information to the dashboard
        jobs must be a list of dictionaries with the following information
        (* denotes an optional argument):
            workflow -> Name of the workflow
            name -> unique name of the job
            retry_count -> retry count of the job
            statusValue -> Job status value
            statusMessage -> Message from the new status
            *location -> Computing element the job is destinated to
            *fwjr -> Post processing step information
        """
        logging.info("Handling %d jobs" % len(jobs))
        logging.debug("Handling jobs: %s" % jobs)

        for job in jobs:
            logging.debug("Sending info for job %s" % str(job))

            package = {}
            package['MessageType']       = 'JobStatus'
            package['jobId']             = '%s_%i' % (job['name'],
                                                    job['retry_count'])
            package['taskId']            = self.taskPrefix + job['workflow']
            package['StatusValue']       = statusValue
            package['StatusValueReason'] = statusMessage
            package['StatusEnterTime']   = time.strftime(self.tsFormat,
                                        time.gmtime())
            package['StatusDestination'] = job.get('location',
                                                   'NotAvailable')

            if job.get('plugin', None):
                package['scheduler']     = job['plugin'][:-6]

            logging.debug("Sending: %s" % str(package))
            result = apmonSend(taskid = package['taskId'],
                               jobid = package['jobId'],
                               params = package,
                               logr = logging,
                               apmonServer = self.serverreport)

            if result != 0:
                msg =  "Error %i sending info for submitted job %s via UDP\n" \
                        % (result, job['name'])
                msg += "Ignoring"
                logging.error(msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s" \
                              % (self.destHost,
                                 self.destPort))
            apmonFree()

            if 'fwjr' in job:
                self.handleSteps(job)

        return
Пример #2
0
    def handleJobStatusChange(self, jobs, statusValue, statusMessage):
        """
        _handleJobStatusChange_

        Handle the submitted, completed or killed jobs:
        Publish the status information to the dashboard
        jobs must be a list of dictionaries with the following information
        (* denotes an optional argument):
            workflow -> Name of the workflow
            name -> unique name of the job
            retry_count -> retry count of the job
            statusValue -> Job status value
            statusMessage -> Message from the new status
            *location -> Computing element the job is destinated to
            *fwjr -> Post processing step information
        """
        logging.info("Handling %d jobs" % len(jobs))
        logging.debug("Handling jobs: %s" % jobs)

        for job in jobs:
            logging.debug("Sending info for job %s" % str(job))

            package = {}
            package['MessageType'] = 'JobStatus'
            package['jobId'] = '%s_%i' % (job['name'], job['retry_count'])
            package['taskId'] = self.taskPrefix + job['workflow']
            package['StatusValue'] = statusValue
            package['StatusValueReason'] = statusMessage
            package['StatusEnterTime'] = time.strftime(self.tsFormat,
                                                       time.gmtime())
            package['StatusDestination'] = job.get('location', 'NotAvailable')

            if job.get('plugin', None):
                package['scheduler'] = job['plugin'][:-6]

            logging.debug("Sending: %s" % str(package))
            result = apmonSend(taskid=package['taskId'],
                               jobid=package['jobId'],
                               params=package,
                               logr=logging,
                               apmonServer=self.serverreport)

            if result != 0:
                msg =  "Error %i sending info for submitted job %s via UDP\n" \
                        % (result, job['name'])
                msg += "Ignoring"
                logging.error(msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s" \
                              % (self.destHost,
                                 self.destPort))
            apmonFree()

            if 'fwjr' in job:
                self.handleSteps(job)

        return
Пример #3
0
    def handleSteps(self, job):
        """
        _handleSteps_

        Handle the post-processing step information
        """
        if job['fwjr'] == None:
            return

        steps = job['fwjr'].listSteps()
        for stepName in steps:
            step = job['fwjr'].retrieveStep(stepName)
            if not hasattr(step, 'counter'):
                continue

            counter = step.counter

            package = {}

            package.update(self.getPerformanceInformation(step))
            package.update(self.getEventInformation(stepName, job['fwjr']))

            trimmedPackage = {}
            for key in package:
                if package[key] != None:
                    trimmedPackage['%d_%s' % (counter, key)] = package[key]
            package = trimmedPackage

            if not package:
                continue

            package['jobId'] = '%s_%i' % (job['name'], job['retry_count'])
            package['taskId'] = self.taskPrefix + job['workflow']
            package['%d_stepName' % counter] = stepName

            logging.debug("Sending step info: %s" % str(package))
            result = apmonSend(taskid=package['taskId'],
                               jobid=package['jobId'],
                               params=package,
                               logr=logging,
                               apmonServer=self.serverreport)

            if result != 0:
                msg =  "Error %i sending info for completed job %s via UDP\n" \
                        % (result, job['name'])
                msg += "Ignoring"
                logging.error(msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s" \
                              % (self.destHost,
                                 self.destPort))
        apmonFree()

        return
Пример #4
0
    def handleSteps(self, job):
        """
        _handleSteps_

        Handle the post-processing step information
        """
        if job['fwjr'] == None:
            return

        steps = job['fwjr'].listSteps()
        for stepName in steps:
            step = job['fwjr'].retrieveStep(stepName)
            if not hasattr(step, 'counter'):
                continue

            counter = step.counter

            package = {}

            package.update(self.getPerformanceInformation(step))
            package.update(self.getEventInformation(stepName, job['fwjr']))

            trimmedPackage = {}
            for key in package:
                if package[key] != None:
                    trimmedPackage['%d_%s' % (counter, key)] = package[key]
            package = trimmedPackage

            if not package:
                continue

            package['jobId']    = '%s_%i' % (job['name'], job['retry_count'])
            package['taskId']   = self.taskPrefix + job['workflow']
            package['%d_stepName' % counter] = stepName


            logging.debug("Sending step info: %s" % str(package))
            result = apmonSend(taskid = package['taskId'],
                               jobid = package['jobId'], params = package,
                               logr = logging, apmonServer = self.serverreport)

            if result != 0:
                msg =  "Error %i sending info for completed job %s via UDP\n" \
                        % (result, job['name'])
                msg += "Ignoring"
                logging.error(msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s" \
                              % (self.destHost,
                                 self.destPort))
        apmonFree()

        return
Пример #5
0
    def handleCreated(self, jobs):
        """
        _handleCreated_

        Handle the created jobs:
        Publish the jobs' meta information (and tasks' if not in the Cache) to
        the dashboard
        jobs must be a list of dictionaries with the following information
        (* denotes an optional argument):
            workflow -> Name of the workflow
            name -> unique name of the job
            retry_count -> retry count of the job
            taskType -> Workflow type (analysis, production, etc...)
            jobType -> Job type (merge, processing, etc...)
            *NEventsToprocess -> Number of events the job will process
        Additionally the job should carry information about the task according
        to the description of the addTask method
        """
        logging.info ("Handling %d created jobs" % len(jobs))
        logging.debug ("Handling created jobs: %s" % jobs)

        for job in jobs:
            logging.debug("Sending info for job %s" % str(job))

            package = {}
            package['MessageType']      = 'JobMeta'
            package['taskId']           = self.taskPrefix + \
                                               job['workflow']
            package['jobId']            = '%s_%i' % (job['name'],
                                                    job['retry_count'])
            package['TaskType']         = job['taskType']
            package['JobType']          = job['jobType']
            package['NEventsToProcess'] = job.get('nEventsToProc',
                                                    'NotAvailable')

            logging.debug("Sending: %s" % str(package))
            result = apmonSend(taskid = package['taskId'],
                               jobid = package['jobId'],
                               params = package,
                               logr = logging,
                               apmonServer = self.serverreport)
            if result != 0:
                msg = "Error %i sending info for submitted job %s via UDP\n" \
                      % (result, job['name'])
                msg += "Ignoring"
                logging.error(msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s" \
                              % (self.destHost,
                                 self.destPort))
            apmonFree()

        return
Пример #6
0
    def handleSteps(self, job):
        """
        _handleSteps_

        Handle the post-processing step information
        """

        performanceSteps = job['performance']
        for stepName in performanceSteps.keys():
            performance = performanceSteps[stepName]
            package = {}
            package['jobId']                  = '%s_%i' % (job['name'], job['retryCount'])
            package['taskId']                 = 'wmagent_%s' % job['requestName']
            package['stepName']               = stepName
            package['PeakValueRss'] 	      = performance['memory'].get('PeakValueRss', None)
            package['PeakValueVsize'] 	      = performance['memory'].get('PeakValueVsize', None)
            package['writeTotalMB']           = performance['storage'].get('writeTotalMB', None)
            package['readPercentageOps']      = performance['storage'].get('readPercentageOps', None)
            package['readAveragekB'] 	      = performance['storage'].get('readAveragekB', None)
            package['readTotalMB'] 	      = performance['storage'].get('readTotalMB', None)
            package['readNumOps']  	      = performance['storage'].get('readNumOps', None)
            package['readCachePercentageOps'] = performance['storage'].get('readCachePercentageOps', None)
            package['readMBSec']              = performance['storage'].get('readMBSec', None)
            package['readMaxMSec']            = performance['storage'].get('readMaxMSec', None)
            package['readTotalSecs'] 	      = performance['storage'].get('readTotalSecs', None) 
            package['writeTotalSecs'] 	      = performance['storage'].get('writeTotalSecs', None) 
            package['TotalJobCPU']            = performance['cpu'].get('TotalJobCPU', None)
            package['TotalEventCPU'] 	      = performance['cpu'].get('TotalEventCPU', None)
            package['AvgEventCPU'] 	      = performance['cpu'].get('AvgEventCPU', None)
            package['AvgEventTime'] 	      = performance['cpu'].get('AvgEventTime', None)
            package['MinEventCPU']            = performance['cpu'].get('MinEventCPU', None)
            package['MaxEventTime'] 	      = performance['cpu'].get('MaxEventTime', None)
            package['TotalJobTime'] 	      = performance['cpu'].get('TotalJobTime', None)
            package['MinEventTime'] 	      = performance['cpu'].get('MinEventTime', None)
            package['MaxEventCPU']            = performance['cpu'].get('MaxEventCPU', None)
            
            logging.debug("Sending performance info: %s" % str(package))
            result = apmonSend( taskid = package['taskId'], jobid = package['jobId'], params = package, logr = logging, apmonServer = self.serverreport)
        
            if result != 0:
                msg =  "Error %i sending info for completed job %s via UDP\n" % (result, job['name'])
                msg += "Ignoring"
                logging.error(msg)
                self.sendAlert(6, msg = msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s, pass %s" \
                              % (self.config.DashboardReporter.dashboardHost,
                                 self.config.DashboardReporter.dashboardPort,
                                 getattr(self.config.DashboardReporter, 'dashboardPass', '')))
        apmonFree()

        return
Пример #7
0
    def handleCreated(self, jobs):
        """
        _handleCreated_

        Handle the created jobs:
        Publish the jobs' meta information (and tasks' if not in the Cache) to
        the dashboard
        jobs must be a list of dictionaries with the following information
        (* denotes an optional argument):
            workflow -> Name of the workflow
            name -> unique name of the job
            retry_count -> retry count of the job
            taskType -> Workflow type (analysis, production, etc...)
            jobType -> Job type (merge, processing, etc...)
            *NEventsToprocess -> Number of events the job will process
        Additionally the job should carry information about the task according
        to the description of the addTask method
        """
        logging.info("Handling %d created jobs" % len(jobs))
        logging.debug("Handling created jobs: %s" % jobs)

        for job in jobs:
            logging.debug("Sending info for job %s" % str(job))

            package = {}
            package['MessageType'] = 'JobMeta'
            package['taskId']           = self.taskPrefix + \
                                               job['workflow']
            package['jobId'] = '%s_%i' % (job['name'], job['retry_count'])
            package['TaskType'] = job['taskType']
            package['JobType'] = job['jobType']
            package['NEventsToProcess'] = job.get('nEventsToProc',
                                                  'NotAvailable')

            logging.debug("Sending: %s" % str(package))
            result = apmonSend(taskid=package['taskId'],
                               jobid=package['jobId'],
                               params=package,
                               logr=logging,
                               apmonServer=self.serverreport)
            if result != 0:
                msg = "Error %i sending info for submitted job %s via UDP\n" \
                      % (result, job['name'])
                msg += "Ignoring"
                logging.error(msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s" \
                              % (self.destHost,
                                 self.destPort))
            apmonFree()

        return
Пример #8
0
    def testApmonInstance(self):
        """
        _testApmonInstance_

        Just test initialization of apmon Instance
        """
        print("Apmon Configuration %s" % APMONCONF)
        apmon = getApmonInstance(apmonServer=APMONCONF)
        self.assertTrue(apmon.initializedOK())
        # Free up apmon instance and check if it was successfull
        apmonFree()
        self.assertFalse(APMONINIT)
        self.assertEqual(None, APMONINSTANCE)
Пример #9
0
    def testApmonInstance(self):
        """
        _testApmonInstance_

        Just test initialization of apmon Instance
        """
        print("Apmon Configuration %s" % APMONCONF)
        apmon = getApmonInstance(apmonServer=APMONCONF)
        self.assertTrue(apmon.initializedOK())
        # Free up apmon instance and check if it was successfull
        apmonFree()
        self.assertFalse(APMONINIT)
        self.assertEqual(None, APMONINSTANCE)
Пример #10
0
    def handleJobStatusChange(self, jobs, statusValue, statusMessage):
        """
        _handleJobStatusChange_

        Handle the submitted, completed or killed jobs:
        Publish the status information to the dashboard
        jobs must be a list of dictionaries with the following information
        (* denotes an optional argument):
            workflow -> Name of the workflow
            name -> unique name of the job
            retry_count -> retry count of the job
            statusValue -> Job status value
            statusMessage -> Message from the new status
            *location -> Computing element the job is destinated to
            *fwjr -> Post processing step information
        """
        logging.info("Handling jobs: %s" % jobs)

        for job in jobs:
            logging.info("Sending info for job %s" % str(job))

            package = {}
            package["MessageType"] = "JobStatus"
            package["jobId"] = "%s_%i" % (job["name"], job["retry_count"])
            package["taskId"] = self.taskPrefix + job["workflow"]
            package["StatusValue"] = statusValue
            package["StatusValueReason"] = statusMessage
            package["StatusEnterTime"] = time.strftime(self.tsFormat, time.gmtime())
            package["StatusDestination"] = job.get("location", "NotAvailable")

            logging.info("Sending: %s" % str(package))
            result = apmonSend(
                taskid=package["taskId"],
                jobid=package["jobId"],
                params=package,
                logr=logging,
                apmonServer=self.serverreport,
            )

            if result != 0:
                msg = "Error %i sending info for submitted job %s via UDP\n" % (result, job["name"])
                msg += "Ignoring"
                logging.error(msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s" % (self.destHost, self.destPort))
            apmonFree()

            if "fwjr" in job:
                self.handleSteps(job)

        return
Пример #11
0
    def handleSubmitted(self, jobs):
        """
        _handleSubmitted_

        Handle the submitted jobs:
        Send them to Dashboard via UDP
        """
        logging.info("Handling jobs to be submitted: %s" % jobs)

        for job in jobs:
            taskName = job['requestName']
            if not taskName in self.taskCache:
                self.addTask(name = taskName, user = job.get('user', None))
            logging.info("Sending info for task %s" % str(job))

            package = {}
            package['jobId']           = '%s_%i' % (job['name'], job['retryCount'])
            package['taskId']          = 'wmagent_%s' % taskName
            package['GridJobID']       = 'NotAvailable'
            package['retryCount']      = job['retryCount']
            package['MessageTS']       = time.time()
            package['MessageType']     = 'JobMeta'
            package['JobType']         = job['jobType']
            package['TaskType']        = job['taskType']
            package['StatusValue']     = 'submitted'
            package['scheduler']       = 'BossAir'
            package['StatusEnterTime'] = job.get('timestamp', time.time())
                        
            logging.info("Sending: %s" % str(package))
            result = apmonSend(taskid = package['taskId'],
                               jobid = package['jobId'],
                               params = package,
                               logr = logging,
                               apmonServer = self.serverreport)

            if result != 0:
                msg =  "Error %i sending info for submitted job %s via UDP\n" % (result, job['name'])
                msg += "Ignoring"
                logging.error(msg)
                self.sendAlert(6, msg = msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s, pass %s" \
                              % (self.config.DashboardReporter.dashboardHost,
                                 self.config.DashboardReporter.dashboardPort,
                                 getattr(self.config.DashboardReporter, 'dashboardPass', '')))
            apmonFree()
            #self.apmonsender.free()    
        return
Пример #12
0
    def addTask(self, task):
        """
        _addTask_

        Add a task to the Dashboard, jobs must contain the following information
        about the task:
            application -> CMSSW release
            nevtJob -> Number of events per job
            tool -> JobSubmission tool (like Condor? or WMAgent)
            JSToolVersion -> 'tool' version
            GridName -> Subject of user grid proxy
            scheduler -> Scheduler
            TaskType -> Type of activity
            datasetFull -> Input dataset
            CMSUser -> owner of the workflow
        """
        taskName = task["name"]
        package = {}
        package["MessageType"] = "TaskMeta"
        package["application"] = task["application"]
        package["nevtJob"] = task["nevtJob"]
        package["tool"] = "WMAgent"
        package["JSToolVersion"] = __version__
        package["GridName"] = task["GridName"]
        package["scheduler"] = task["scheduler"]
        package["TaskType"] = task["TaskType"]
        package["TaskName"] = self.taskPrefix + taskName
        package["JobName"] = "taskMeta"
        package["datasetFull"] = task["datasetFull"]
        package["CMSUser"] = task["user"]

        logging.debug("Sending task info: %s" % str(package))
        result = apmonSend(
            taskid=package["TaskName"],
            jobid=package["JobName"],
            params=package,
            logr=logging,
            apmonServer=self.serverreport,
        )

        if result != 0:
            msg = "Error %i sending info for new task %s via UDP\n" % (result, taskName)
            msg += "Ignoring"
            logging.error(msg)
            logging.debug("Package sent: %s\n" % package)
            logging.debug("Host info: host %s, port %s" % (self.destHost, self.destPort))
        apmonFree()
Пример #13
0
def send_final_dashboard_update(data, config, monalisa):
    cputime = data['cpu_time']
    events_per_run = data['events_per_run']
    exe_exit_code = data['exe_exit_code']
    exe_time = data['task_timing']['stage_out_end'] - data['task_timing'][
        'prologue_end']
    task_exit_code = data['task_exit_code']
    total_time = data['task_timing']['stage_out_end'] - data['task_timing'][
        'wrapper_start']
    stageout_exit_code = data['stageout_exit_code']
    stageout_se = data['output_storage_element']

    logger.debug("Execution time {}".format(total_time))
    logger.debug("Exiting with code {}".format(task_exit_code))
    logger.debug("Reporting ExeExitCode {}".format(exe_exit_code))
    logger.debug("Reporting StageOutSE {}".format(stageout_se))
    logger.debug("Reporting StageOutExitCode {}".format(stageout_exit_code))

    parameters = {
        'ExeTime': str(exe_time),
        'ExeExitCode': str(exe_exit_code),
        'JobExitCode': str(task_exit_code),
        'JobExitReason': '',
        'StageOutSE': stageout_se,
        'StageOutExitStatus': str(stageout_exit_code),
        'StageOutExitStatusReason': 'Copy succedeed with srm-lcg utils',
        'CrabUserCpuTime': str(cputime),
        'CrabWrapperTime': str(total_time),
        'WCCPU': str(total_time),
        'NoEventsPerRun': str(events_per_run),
        'NbEvPerRun': str(events_per_run),
        'NEventsProcessed': str(events_per_run)
    }
    try:
        parameters.update(
            {'CrabCpuPercentage': str(float(cputime) / float(total_time))})
    except Exception:
        pass

    monitorid = str(config['monitoring']['monitorid'])
    taskid = str(config['monitoring']['taskid'])

    apmonSend(taskid, monitorid, parameters, logging.getLogger('mona'),
              monalisa)
    apmonFree()
Пример #14
0
    def publish(self, data, redundancy = 1):
        """
        _publish_

        Publish information in this object to the Dashboard
        using the ApMon interface and the destinations stored in this
        instance.

        redunancy is the amount to times to publish this information

        """

        logging.info("About to send UDP package to dashboard: %s" % data)
        logging.info("Using address %s" % self.server)
        apmonSend(taskid = self.taskName, jobid = self.jobName, params = data,
                  logr = logging, apmonServer = self.server)
        apmonFree()
        return
Пример #15
0
    def addTask(self, task):
        """
        _addTask_

        Add a task to the Dashboard, jobs must contain the following information
        about the task:
            application -> CMSSW release
            tool -> JobSubmission tool (like Condor? or WMAgent)
            JSToolVersion -> 'tool' version
            TaskType -> Type of activity
            datasetFull -> Input dataset
            CMSUser -> owner of the workflow
        """
        taskName = task['name']
        package = {}
        package['MessageType'] = 'TaskMeta'
        package['application'] = task['application']
        package['tool'] = 'WMAgent'
        package['JSToolVersion'] = __version__
        package['TaskType'] = task['TaskType']
        package['TaskName'] = self.taskPrefix + taskName
        package['JobName'] = 'taskMeta'
        package['datasetFull'] = task['datasetFull']
        package['CMSUser'] = task['user']

        logging.info("Sending %s info" % taskName)
        logging.debug("Sending task info: %s" % str(package))
        result = apmonSend(taskid=package['TaskName'],
                           jobid=package['JobName'],
                           params=package,
                           logr=logging,
                           apmonServer=self.serverreport)

        if result != 0:
            msg = "Error %i sending info for new task %s via UDP\n" % (
                result, taskName)
            msg += "Ignoring"
            logging.error(msg)
            logging.debug("Package sent: %s\n" % package)
            logging.debug("Host info: host %s, port %s" \
                          % (self.destHost,
                             self.destPort))
        apmonFree()
Пример #16
0
    def addTask(self, task):
        """
        _addTask_

        Add a task to the Dashboard, jobs must contain the following information
        about the task:
            application -> CMSSW release
            tool -> JobSubmission tool (like Condor? or WMAgent)
            JSToolVersion -> 'tool' version
            TaskType -> Type of activity
            datasetFull -> Input dataset
            CMSUser -> owner of the workflow
        """
        taskName = task['name']
        package = {}
        package['MessageType']   = 'TaskMeta'
        package['application']   = task['application']
        package['tool']          = 'WMAgent'
        package['JSToolVersion'] = __version__
        package['TaskType']      = task['TaskType']
        package['TaskName']      = self.taskPrefix + taskName
        package['JobName']       = 'taskMeta'
        package['datasetFull']   = task['datasetFull']
        package['CMSUser']       = task['user']

        logging.info("Sending %s info" % taskName)
        logging.debug("Sending task info: %s" % str(package))
        result = apmonSend(taskid = package['TaskName'],
                           jobid = package['JobName'], params = package,
                           logr = logging, apmonServer = self.serverreport)

        if result != 0:
            msg =  "Error %i sending info for new task %s via UDP\n" % (result,
                                                                        taskName)
            msg += "Ignoring"
            logging.error(msg)
            logging.debug("Package sent: %s\n" % package)
            logging.debug("Host info: host %s, port %s" \
                          % (self.destHost,
                             self.destPort))
        apmonFree()
Пример #17
0
    def publish(self, data, redundancy=1):
        """
        _publish_

        Publish information in this object to the Dashboard
        using the ApMon interface and the destinations stored in this
        instance.

        redunancy is the amount to times to publish this information

        """

        logging.info("About to send UDP package to dashboard: %s" % data)
        logging.info("Using address %s" % self.server)
        apmonSend(taskid=self.taskName,
                  jobid=self.jobName,
                  params=data,
                  logr=logging,
                  apmonServer=self.server)
        apmonFree()
        return
Пример #18
0
    def addTask(self, name, user):
        """
        _addTask_

        Add a task to the Dashboard
        """

        package = {}
        package['MessageType']    = 'TaskMeta'
        package['MessageTS']      = time.time()
        package['taskId']         = 'wmagent_%s' % name
        package['jobId']          = 'taskMeta'
        package['JSTool']         = 'WMAgent'
        package['JSToolVersion']  = __version__
        package['CMSUser']        = user
        package['Workflow']       = name
        package['AgentName']      = self.agentName

        logging.info("Sending info for task %s" % str(name))

        logging.debug("Sending task info: %s" % str(package))
        result = apmonSend( taskid = package['taskId'], jobid = package['jobId'], params = package, logr = logging, apmonServer = self.serverreport)


        if result != 0:
            msg =  "Error %i sending info for new task %s via UDP\n" % (result, name)
            msg += "Ignoring"
            logging.error(msg)
            self.sendAlert(6, msg = msg)
            logging.debug("Package sent: %s\n" % package)
            logging.debug("Host info: host %s, port %s, pass %s" \
                          % (self.config.DashboardReporter.dashboardHost,
                             self.config.DashboardReporter.dashboardPort,
                             getattr(self.config.DashboardReporter, 'dashboardPass', '')))
        else:
            self.taskCache.append(name)
        
        #self.apmonsender.free()
        apmonFree()
Пример #19
0
    def handleCompleted(self, jobs):
        """
        _handleCompleted_
        
        Handle the completed jobs:
        Send them to Dashboard via UDP
        """
        logging.info("Handling jobs to be completed: %s" % jobs)        

        for job in jobs:
            package = {}
            package['jobId']           = '%s_%i' % (job['name'], job['retryCount'])
            package['taskId']          = 'wmagent_%s' % job['requestName']
            package['GridJobID']       = job['name']
            package['retryCount']      = job['retryCount']
            package['MessageTS']       = time.time()
            package['MessageType']     = 'JobStatus'
            package['StatusValue']     = job['finalState']
            package['StatusEnterTime'] = job['timestamp']
            package['JobExitCode']     = job['exitCode']

            logging.info("Sending completed info: %s" % str(package))
            result = apmonSend( taskid = package['taskId'], jobid = package['jobId'], params = package, logr = logging, apmonServer = self.serverreport)

            if result != 0:
                msg =  "Error %i sending info for completed job %s via UDP\n" % (result, job['name'])
                msg += "Ignoring"
                logging.error(msg)
                self.sendAlert(6, msg = msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s, pass %s" \
                              % (self.config.DashboardReporter.dashboardHost,
                                 self.config.DashboardReporter.dashboardPort,
                                 getattr(self.config.DashboardReporter, 'dashboardPass', '')))
            apmonFree()

            self.handleSteps(job = job)
        return
Пример #20
0
def send_initial_dashboard_update(data, config, monalisa):
    # Dashboard does not like Unicode, just ASCII encoding
    monitorid = str(config['monitoring']['monitorid'])
    syncid = str(config['monitoring']['syncid'])
    taskid = str(config['monitoring']['taskid'])

    try:
        if os.environ.get("PARROT_ENABLED", "FALSE") == "TRUE":
            raise ValueError()
        sync_ce = loadSiteLocalConfig().siteName
    except Exception:
        for envvar in [
                "GLIDEIN_Gatekeeper", "OSG_HOSTNAME", "CONDORCE_COLLECTOR_HOST"
        ]:
            if envvar in os.environ:
                sync_ce = os.environ[envvar]
                break
        else:
            host = socket.getfqdn()
            sync_ce = config['default host']
            if host.rsplit('.')[-2:] == sync_ce.rsplit('.')[-2:]:
                sync_ce = config['default ce']
            else:
                sync_ce = 'Unknown'

    logger.info("using sync CE {}".format(sync_ce))

    parameters = {
        'ExeStart': str(config['executable']),
        'NCores': config.get('cores', 1),
        'SyncCE': sync_ce,
        'SyncGridJobId': syncid,
        'WNHostName': socket.getfqdn()
    }

    apmonSend(taskid, monitorid, parameters, logging.getLogger('mona'),
              monalisa)
    apmonFree()
Пример #21
0
    def publish(self, redundancy = 1, data = None):
        """
        _publish_

        Publish information in this object to the Dashboard
        using the ApMon interface and the destinations stored in this
        instance.

        redunancy is the amount to times to publish this information

        """
        #if self.publisher == None:
        #    self._InitPublisher()
      
        
        #self.publisher.connect()
        toPublish = {}
        if data:
            toPublish = data
        else:
            toPublish.update(self)
        for key, value in toPublish.items():
            if value == None:
                del toPublish[key]

                
        logging.debug("About to send UDP package to dashboard: %s" % toPublish)
        logging.debug("Using address %s" % self.server)
        apmonSend(taskid = self.taskName, jobid = self.jobName, params = toPublish,
                  logr = logging, apmonServer = self.server)
        apmonFree()
        
        #for i in range(1, redundancy+1):
        #    self.publisher.send(**toPublish)
        #    
        #self.publisher.disconnect()
        return
Пример #22
0
    def handleSteps(self, job):
        """
        _handleSteps_

        Handle the post-processing step information
        """
        if job['fwjr'] == None:
            return

        steps = job['fwjr'].listSteps()
        for stepName in steps:
            step = job['fwjr'].retrieveStep(stepName)
            if not hasattr(step, 'counter'):
                continue

            counter = step.counter

            package = {}

            package.update(self.getPerformanceInformation(step))
            package.update(self.getEventInformation(stepName, job['fwjr']))

            # Input files should just be appended onto inputFiles instead of given a step #
            # per https://hypernews.cern.ch/HyperNews/CMS/get/comp-monitoring/326.html
            inputFilePackage = self.getInputFilesInformation(step)
            if inputFilePackage:
                if 'inputFiles' in package:
                    package['inputFiles'] += ';' +  inputFilePackage['inputFiles']
                else:
                    package.update(self.getInputFilesInformation(step))

            trimmedPackage = {}
            for key in package:
                if key in ['inputFiles', 'Basename', 'inputBlocks']:
                    trimmedPackage[key] = package[key]
                elif package[key] != None:
                    trimmedPackage['%d_%s' % (counter, key)] = package[key]
            package = trimmedPackage

            if not package:
                continue

            package['jobId']    = '%s_%i' % (job['name'], job['retry_count'])
            package['taskId']   = self.taskPrefix + job['workflow']
            package['%d_stepName' % counter] = stepName


            logging.debug("Sending step info: %s" % str(package))
            result = apmonSend(taskid = package['taskId'],
                               jobid = package['jobId'], params = package,
                               logr = logging, apmonServer = self.serverreport)

            if result != 0:
                msg =  "Error %i sending info for completed job %s via UDP\n" \
                        % (result, job['name'])
                msg += "Ignoring"
                logging.error(msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s" \
                              % (self.destHost,
                                 self.destPort))
            apmonFree()

        return
Пример #23
0
    def handleSteps(self, job):
        """
        _handleSteps_

        Handle the post-processing step information
        """
        if job["fwjr"] == None:
            return
        performanceSteps = job["fwjr"].listSteps()
        for stepName in performanceSteps:
            step = job["fwjr"].retrieveStep(stepName)
            if not hasattr(step, "performance"):
                continue
            performance = step.performance
            toReport = 3
            if not hasattr(performance, "memory"):
                performance.section_("memory")
                toReport -= 1
            if not hasattr(performance, "storage"):
                performance.section_("storage")
                toReport -= 1
            if not hasattr(performance, "cpu"):
                performance.section_("cpu")
                toReport -= 1
            # There's nothing to report, get out
            if not toReport:
                continue
            package = {}
            package["jobId"] = "%s_%i" % (job["name"], job["retry_count"])
            package["taskId"] = self.taskPrefix + job["workflow"]
            package["stepName"] = stepName
            package["PeakValueRss"] = getattr(performance.memory, "PeakValueRss", None)
            package["PeakValuePss"] = getattr(performance.memory, "PeakValuePss", None)
            package["PeakValueVsize"] = getattr(performance.memory, "PeakValueVsize", None)
            package["writeTotalMB"] = getattr(performance.storage, "writeTotalMB", None)
            package["readPercentageOps"] = getattr(performance.storage, "readPercentageOps", None)
            package["readAveragekB"] = getattr(performance.storage, "readAveragekB", None)
            package["readTotalMB"] = getattr(performance.storage, "readTotalMB", None)
            package["readNumOps"] = getattr(performance.storage, "readNumOps", None)
            package["readCachePercentageOps"] = getattr(performance.storage, "readCachePercentageOps", None)
            package["readMBSec"] = getattr(performance.storage, "readMBSec", None)
            package["readMaxMSec"] = getattr(performance.storage, "readMaxMSec", None)
            package["readTotalSecs"] = getattr(performance.storage, "readTotalSecs", None)
            package["writeTotalSecs"] = getattr(performance.storage, "writeTotalSecs", None)
            package["TotalJobCPU"] = getattr(performance.cpu, "TotalJobCPU", None)
            package["AvgEventCPU"] = getattr(performance.cpu, "AvgEventCPU", None)
            package["MaxEventTime"] = getattr(performance.cpu, "MaxEventTime", None)
            package["AvgEventTime"] = getattr(performance.cpu, "AvgEventTime", None)
            package["MinEventCPU"] = getattr(performance.cpu, "MinEventCPU", None)
            package["TotalEventCPU"] = getattr(performance.cpu, "TotalEventCPU", None)
            package["TotalJobTime"] = getattr(performance.cpu, "TotalJobTime", None)
            package["MinEventTime"] = getattr(performance.cpu, "MinEventTime", None)
            package["MaxEventCPU"] = getattr(performance.cpu, "MaxEventCPU", None)

            logging.debug("Sending performance info: %s" % str(package))
            result = apmonSend(
                taskid=package["taskId"],
                jobid=package["jobId"],
                params=package,
                logr=logging,
                apmonServer=self.serverreport,
            )

            if result != 0:
                msg = "Error %i sending info for completed job %s via UDP\n" % (result, job["name"])
                msg += "Ignoring"
                logging.error(msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s" % (self.destHost, self.destPort))
        apmonFree()

        return
Пример #24
0
    def handleSteps(self, job):
        """
        _handleSteps_

        Handle the post-processing step information
        """
        if job['fwjr'] == None:
            return

        steps = job['fwjr'].listSteps()
        for stepName in steps:
            step = job['fwjr'].retrieveStep(stepName)
            if not hasattr(step, 'counter'):
                continue

            counter = step.counter

            package = {}

            package.update(self.getPerformanceInformation(step))
            package.update(self.getEventInformation(stepName, job['fwjr']))

            # Input files should just be appended onto inputFiles instead of given a step #
            # per https://hypernews.cern.ch/HyperNews/CMS/get/comp-monitoring/326.html
            inputFilePackage = self.getInputFilesInformation(step)
            if inputFilePackage:
                if 'inputFiles' in package:
                    package[
                        'inputFiles'] += ';' + inputFilePackage['inputFiles']
                else:
                    package.update(self.getInputFilesInformation(step))

            trimmedPackage = {}
            for key in package:
                if key in ['inputFiles', 'Basename', 'inputBlocks']:
                    trimmedPackage[key] = package[key]
                elif package[key] != None:
                    trimmedPackage['%d_%s' % (counter, key)] = package[key]
            package = trimmedPackage

            if not package:
                continue

            package['jobId'] = '%s_%i' % (job['name'], job['retry_count'])
            package['taskId'] = self.taskPrefix + job['workflow']
            package['%d_stepName' % counter] = stepName

            logging.debug("Sending step info: %s" % str(package))
            result = apmonSend(taskid=package['taskId'],
                               jobid=package['jobId'],
                               params=package,
                               logr=logging,
                               apmonServer=self.serverreport)

            if result != 0:
                msg =  "Error %i sending info for completed job %s via UDP\n" \
                        % (result, job['name'])
                msg += "Ignoring"
                logging.error(msg)
                logging.debug("Package sent: %s\n" % package)
                logging.debug("Host info: host %s, port %s" \
                              % (self.destHost,
                                 self.destPort))
            apmonFree()

        return
Пример #25
0
 def free(self):
     apmonFree()