Пример #1
0
 def _cl_job_status_message(self, status, status_source, status_start_time=None):
     # Not null: EXECUTION_BACKEND, GRIDJOBID, JOB_ID_INSIDE_THE_TASK, TASKNAME, UNIQUEJOBID
     j = self.job_info # called on client, so job_info is Job object
     msg = {
         'DESTCE': LCGUtil.cl_dest_ce(j), # Actual CE. e.g. ce-3-fzk.gridka.de:2119/jobmanager-pbspro-atlasXS
         'DESTSITE': None, # Actual site. e.g. FZK-LCG2
         'DESTWN': None, # Actual worker node hostname. e.g. c01-102-103.gridka.de
         'EXECUTION_BACKEND': LCGUtil.cl_execution_backend(j), # Backend. e.g. LCG
         'GRIDEXITCODE': None, # e.g. 0
         'GRIDEXITREASON': None, # e.g. Job terminated successfully
         'GRIDJOBID': LCGUtil.cl_grid_job_id(j), # e.g. https://grid-lb0.desy.de:9000/moqY5njFGurEuoDkkJmtBA
         'JOBEXITCODE': None, # e.g. 0
         'JOBEXITREASON': None,
         'JOB_ID_INSIDE_THE_TASK': LCGUtil.cl_job_id_inside_the_task(j), # subjob id e.g. 0
         'OWNERDN': LCGUtil.cl_ownerdn(), # Grid certificate. e.g. /DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=dtuckett/CN=671431/CN=David Tuckett/CN=proxy
         'REPORTER': 'ToolUI', # e.g. ToolUI, JobWN
         'REPORTTIME': CommonUtil.utcnow(), # e.g. 2009-11-25T14:59:24.754249Z
         'STATENAME': status, # e.g. submitted, Done (Success)
         'STATESOURCE': status_source, # e.g. Ganga, LB
         'STATESTARTTIME': status_start_time, # e.g. 2009-11-25T14:32:51.428988Z
         'TASKNAME': LCGUtil.cl_task_name(j), # e.g. ganga:6702b50a-8a31-4476-8189-62ea5b8e00b3:TrigStudy
         'UNIQUEJOBID': LCGUtil.cl_unique_job_id(j),  # Ganga uuid e.g. 1c08ff3b-904f-4f77-a481-d6fa765813cb
         '___fqid' : j.fqid,
         }
     return msg
Пример #2
0
 def start(self, **opts):
     """Log start event on worker node."""
     ji = self.job_info # called on worker node, so job_info is dictionary
     self._log('debug', 'start %s' % ji['fqid'])
     # send Ganga running job-status message
     message = self._wn_job_status_message('running', 'Ganga', CommonUtil.utcnow())
     self._send(self.config_info['destination_job_status'], message)
Пример #3
0
 def _wn_job_status_message(self, status, status_source, status_start_time):
     # Not null: EXECUTION_BACKEND, GRIDJOBID, JOB_ID_INSIDE_THE_TASK, TASKNAME, UNIQUEJOBID
     ji = self.job_info # called on worker node, so job_info is dictionary
     msg = {
         'DESTCE': LCGUtil.wn_dest_ce(ji),
         'DESTSITE': LCGUtil.wn_dest_site(ji),
         'DESTWN': LCGUtil.wn_dest_wn(),
         'EXECUTION_BACKEND': ji['EXECUTION_BACKEND'],
         'GRIDEXITCODE': None,
         'GRIDEXITREASON': None,
         'GRIDJOBID': LCGUtil.wn_grid_job_id(ji),
         'JOBEXITCODE': None,
         'JOBEXITREASON': None,
         'JOB_ID_INSIDE_THE_TASK': ji['JOB_ID_INSIDE_THE_TASK'],
         'OWNERDN': ji['OWNERDN'],
         'REPORTER': 'JobWN',
         'REPORTTIME': CommonUtil.utcnow(),
         'STATENAME': status,
         'STATESOURCE': status_source,
         'STATESTARTTIME': status_start_time,
         'TASKNAME': ji['TASKNAME'],
         'UNIQUEJOBID': ji['UNIQUEJOBID'],
         '___fqid' : ji['fqid'],
         }
     return msg
Пример #4
0
 def _cl_task_meta_message(self):
     j = self.job_info  # called on client, so job_info is Job object
     msg = {
         'APPLICATION': AthenaUtil.cl_application(j),  # e.g. ATHENA
         # e.g. 15.5.1
         'APPLICATIONVERSION': AthenaUtil.cl_application_version(j),
         # e.g. fdr08_run2.0052283.physics_Muon.merge.AOD.o3_f8_m10
         'INPUTDATASET': AthenaUtil.cl_input_dataset(j),
         'JSTOOL': 'Ganga',  # e.g. Ganga, Panda
         # hostname of client. e.g. lxplus246.cern.ch
         'JSTOOLUI': AthenaUtil.cl_jstoolui(),
         # Unknown at submission. e.g.
         # user09.DavidTuckett.ganga.420.20091125.FZK-LCG2_SCRATCHDISK
         'OUTPUTDATASET': AthenaUtil.cl_output_dataset(j),
         # Unknown at submission. e.g. FZK-LCG2_SCRATCHDISK
         'OUTPUTSE': AthenaUtil.cl_output_se(j),
         # Grid certificate. e.g. /DC=ch/DC=cern/OU=Organic
         # Units/OU=Users/CN=dtuckett/CN=671431/CN=David Tuckett/CN=proxy
         'OWNERDN': self.dynamic_util.cl_ownerdn(),
         'REPORTER': 'ToolUI',  # e.g. ToolUI, JobWN
         # e.g. 2009-11-25T14:59:24.754249Z
         'REPORTTIME': CommonUtil.utcnow(),
         'SUBMISSIONTYPE': 'direct',
         # e.g. CE_xxx,SITE_CSCS-LCG2_DATADISK,SITE_DESY-ZN_DATADISK
         'TARGET': AthenaUtil.cl_target(j),
         # e.g. ganga:6702b50a-8a31-4476-8189-62ea5b8e00b3:TrigStudy
         'TASKNAME': self.dynamic_util.cl_task_name(j),
         # e.g. analysis, production, hammercloud etc.
         'TASKTYPE': AthenaUtil.cl_task_type(self.config_info),
         '___fqid': j.fqid,
     }
     return msg
Пример #5
0
 def _wn_job_processing_attributes_message(self):
     ji = self.job_info  # called on worker node, so job_info is dictionary
     athena_stats = AthenaUtil.wn_load_athena_stats()
     msg = {
         # e.g. https://grid-lb0.desy.de:9000/moqY5njFGurEuoDkkJmtBA
         'GRIDJOBID': self.dynamic_util.wn_grid_job_id(ji),
         # subjob id e.g. 0
         'JOB_ID_INSIDE_THE_TASK': ji['JOB_ID_INSIDE_THE_TASK'],
         # number of events processed. e.g. 100
         'NEVENTSPROCESSED': athena_stats.get('totalevents'),
         # number of files processed. e.g. 2
         'NFILESPROCESSED': athena_stats.get('numfiles'),
         'REPORTER': 'JobWN',  # e.g. ToolUI, JobWN
         # e.g. 2009-11-25T14:59:24.754249Z
         'REPORTTIME': CommonUtil.utcnow(),
         # system cpu time in seconds. e.g. 38.45
         'SYSTEMTIME': athena_stats.get('systemtime'),
         # e.g.
         # [email protected]:/afs/cern.ch/user/d/dtuckett/gangadir/repository/dtuckett/LocalAMGA
         'TASKNAME': ji['TASKNAME'],
         # Ganga uuid e.g. 1c08ff3b-904f-4f77-a481-d6fa765813cb
         'UNIQUEJOBID': ji['UNIQUEJOBID'],
         # user cpu time in seconds. e.g. 479.0
         'USERTIME': athena_stats.get('usertime'),
         # wallclock time in seconds. e.g. 1040
         'WALLCLOCK': athena_stats.get('wallclock'),
         '___fqid': ji['fqid'],
     }
     return msg
Пример #6
0
 def _cl_job_meta_message(self):
     j = self.job_info  # called on client, so job_info is Job object
     msg = {
         # e.g. https://grid-lb0.desy.de:9000/moqY5njFGurEuoDkkJmtBA
         'GRIDJOBID': self.dynamic_util.cl_grid_job_id(j),
         # e.g. fdr08_run2.0052283.physics_Muon.merge.AOD.o3_f8_m10
         'INPUTDATASET': AthenaUtil.cl_input_dataset(j),
         # subjob id e.g. 0
         'JOB_ID_INSIDE_THE_TASK': self.dynamic_util.cl_job_id_inside_the_task(j),
         # None or non-negative number e.g. 100
         'NEVENTSREQUESTED': AthenaUtil.cl_nevents_requested(j),
         # e.g. user09.DavidTuckett.ganga.420.20091125.FZK-LCG2_SCRATCHDISK
         'OUTPUTDATASET': AthenaUtil.cl_output_dataset(j),
         # Unknown at submission. e.g. FZK-LCG2_SCRATCHDISK
         'OUTPUTSE': AthenaUtil.cl_output_se(j),
         'PILOT': 0,  # 0 = not pilot, 1 = pilot
         'PILOTNAME': None,
         'REPORTER': 'ToolUI',  # e.g. ToolUI, JobWN
         # e.g. 2009-11-25T14:59:24.754249Z
         'REPORTTIME': CommonUtil.utcnow(),
         # e.g. CE_xxx,SITE_CSCS-LCG2_DATADISK,SITE_DESY-ZN_DATADISK
         'TARGET': AthenaUtil.cl_target(j),
         # e.g. ganga:6702b50a-8a31-4476-8189-62ea5b8e00b3:TrigStudy
         'TASKNAME': self.dynamic_util.cl_task_name(j),
         # Ganga uuid e.g. 1c08ff3b-904f-4f77-a481-d6fa765813cb
         'UNIQUEJOBID': self.dynamic_util.cl_unique_job_id(j),
         '___fqid': j.fqid,
     }
     return msg
Пример #7
0
 def stop(self, exitcode, **opts):
     """Log stop event on worker node."""
     ji = self.job_info # called on worker node, so job_info is dictionary
     self._log('debug', 'stop %s' % ji['fqid'])
     if exitcode == 0:
         status = 'completed'
     else:
         status = 'failed'
     # send Ganga completed or failed job-status message
     message = self._wn_job_status_message(status, 'Ganga', CommonUtil.utcnow())
     message['JOBEXITCODE'] = exitcode
     message['JOBEXITREASON'] = None #TODO: how can we know this?
     self._send(self.config_info['destination_job_status'], message)
Пример #8
0
 def fail(self, **opts):
     """Log fail event on client."""
     j = self.job_info # called on client, so job_info is Job object
     self._log('debug', 'fail %s' % j.fqid)
     # ignore master wrapper jobs
     if j.subjobs:
         self._log('debug', 'Not sending unwanted message on fail for master wrapper job %s.' % j.fqid)
         return
     # send LB Done or Aborted job-status message
     message = self._cl_job_status_message('failed', 'LB', CommonUtil.utcnow())
     message['GRIDEXITCODE'] = self.dynamic_util.cl_grid_exit_code(j)
     message['GRIDEXITREASON'] = self.dynamic_util.cl_grid_exit_reason(j)
     self._send(self.config_info['destination_job_status'], message)
Пример #9
0
 def submit(self, **opts):
     """Log submit event on client."""
     j = self.job_info # called on client, so job_info is Job object
     self._log('debug', 'submit %s' % j.fqid)
     # ignore master wrapper jobs
     if j.subjobs:
         self._log('debug', 'Not sending unwanted message on submit for master wrapper job %s.' % j.fqid)
         return
     # send Ganga submitted job-status message
     message = self._cl_job_status_message('submitted', 'Ganga', CommonUtil.utcnow())
     if message['GRIDJOBID'] is None:
         # This is to handle the temporary workaround in
         # LCG.master_bulk_updateMonitoringInformation() which results in two
         # submit messages being sent, one without a grid_job_id.
         self._log('debug', 'Not sending redundant message on submit without grid_job_id for job %s.' % j.fqid)
     else:
         self._send(self.config_info['destination_job_status'], message)
Пример #10
0
    def submit(self, **opts):
        """Log submit event on client."""
        j = self.job_info  # called on client, so job_info is Job object
        self._log('debug', 'submit %s' % j.fqid)
        # ignore master wrapper jobs
        if j.subjobs:
            self._log(
                'debug', 'Not sending unwanted message on submit for master wrapper job %s.' % j.fqid)
            return
        # send Ganga submitted job-status message
        message = self._cl_job_status_message(
            'submitted', 'Ganga', CommonUtil.utcnow())
        if message['GRIDJOBID'] is None:
            # This is to handle the temporary workaround in
            # IBackend.master_bulk_updateMonitoringInformation() which results in two
            # submit messages being sent, one without a grid_job_id.
            self._log(
                'debug', 'Not sending redundant message on submit without grid_job_id for job %s.' % j.fqid)
        else:
            self._send(self.config_info['destination_job_status'], message)

        if j.master:
            j = j.master

        from Ganga.GPIDev import Credentials
        proxy = Credentials.getCredential('GridProxy')
        ownerdn = proxy.info('-subject')

        user = '******'
        # if no error in the proxy -> get the second CN value from right to
        # left
        if ownerdn.find('ERROR') == -1:
            if ownerdn.rfind('CN=') > -1:
                subownerdn = ownerdn[0:ownerdn.rfind('CN=') - 1]
                user = subownerdn[
                    subownerdn.rfind('CN=') + 3:].replace(' ', '')

        task_name = 'ganga:%s:%s' % (j.info.uuid, j.name,)
        task_mon_link = "http://dashb-atlas-jobdev.cern.ch/dashboard/templates/index.html#user=%s&from=&till=&timeRange=lastDay&refresh=0&tid=%s&p=1&uparam[]=all" % (
            user, task_name)

        if j.backend.__class__.__name__ == 'Panda' and len(j.backend.buildjobs) > 0 and j.backend.buildjobs[0].url is not None:
            j.info.monitoring_links = [
                (task_mon_link, 'dashboard'), (j.backend.buildjobs[0].url, 'panda')]
        else:
            j.info.monitoring_links = [(task_mon_link, 'dashboard')]