def test_FullChain(self): """ This test will - call all the WMSClient methods that will end up calling all the JobManager service methods - use the JobMonitoring to verify few properties - call the JobCleaningAgent to eliminate job entries from the DBs """ wmsClient = WMSClient() jobMonitor = JobMonitoringClient() jobStateUpdate = JobStateUpdateClient() # create the job job = helloWorldJob() jobDescription = createFile(job) # submit the job res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(res['OK']) self.assertTrue(isinstance(res['Value'], int)) self.assertEqual(res['Value'], res['JobID']) jobID = res['JobID'] jobID = res['Value'] # updating the status res = jobStateUpdate.setJobStatus(jobID, 'Running', 'Executing Minchiapp', 'source') self.assertTrue(res['OK']) # reset the job res = wmsClient.resetJob(jobID) self.assertTrue(res['OK']) # reschedule the job res = wmsClient.rescheduleJob(jobID) self.assertTrue(res['OK']) res = jobMonitor.getJobStatus(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Received') res = jobMonitor.getJobsMinorStatus([jobID]) self.assertTrue(res['OK']) self.assertEqual( res['Value'], {jobID: { 'MinorStatus': 'Job Rescheduled', 'JobID': jobID }}) res = jobMonitor.getJobsApplicationStatus([jobID]) self.assertTrue(res['OK']) self.assertEqual( res['Value'], {jobID: { 'ApplicationStatus': 'Unknown', 'JobID': jobID }}) # updating the status again res = jobStateUpdate.setJobStatus(jobID, 'Matched', 'matching', 'source') self.assertTrue(res['OK']) # kill the job res = wmsClient.killJob(jobID) self.assertTrue(res['OK']) res = jobMonitor.getJobStatus(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Killed') # updating the status aaaagain res = jobStateUpdate.setJobStatus(jobID, 'Done', 'matching', 'source') self.assertTrue(res['OK']) # kill the job res = wmsClient.killJob(jobID) self.assertTrue(res['OK']) res = jobMonitor.getJobStatus(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Done') # this time it won't kill... it's done! # delete the job - this will just set its status to "deleted" res = wmsClient.deleteJob(jobID) self.assertTrue(res['OK']) res = jobMonitor.getJobStatus(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Deleted')
def test_FullChain(self): """This test will - call all the WMSClient methods that will end up calling all the JobManager service methods - use the JobMonitoring to verify few properties - call the JobCleaningAgent to eliminate job entries from the DBs """ wmsClient = WMSClient() jobMonitor = JobMonitoringClient() jobStateUpdate = JobStateUpdateClient() # create the job job = helloWorldJob() jobDescription = createFile(job) # submit the job res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(res["OK"], res.get("Message")) self.assertTrue(isinstance(res["Value"], int), msg="Got %s" % type(res["Value"])) self.assertEqual(res["Value"], res["JobID"], msg="Got %s, expected %s" % (str(res["Value"]), res["JobID"])) jobID = res["JobID"] jobID = res["Value"] # updating the status res = jobStateUpdate.setJobStatus(jobID, JobStatus.RUNNING, "Executing Minchiapp", "source") self.assertTrue(res["OK"], res.get("Message")) # reset the job res = wmsClient.resetJob(jobID) self.assertTrue(res["OK"], res.get("Message")) # reschedule the job res = wmsClient.rescheduleJob(jobID) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobsStatus(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"][jobID]["Status"], JobStatus.RECEIVED, msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobsMinorStatus([jobID]) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"], {jobID: { "MinorStatus": "Job Rescheduled" }}, msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobsApplicationStatus([jobID]) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"], {jobID: { "ApplicationStatus": "Unknown" }}, msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobsStates([jobID]) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual( res["Value"], { jobID: { "Status": JobStatus.RECEIVED, "MinorStatus": "Job Rescheduled", "ApplicationStatus": "Unknown" } }, msg="Got %s" % str(res["Value"]), ) # updating the status again res = jobStateUpdate.setJobStatus(jobID, JobStatus.CHECKING, "checking", "source") self.assertTrue(res["OK"], res.get("Message")) res = jobStateUpdate.setJobStatus(jobID, JobStatus.WAITING, "waiting", "source") self.assertTrue(res["OK"], res.get("Message")) res = jobStateUpdate.setJobStatus(jobID, JobStatus.MATCHED, "matched", "source") self.assertTrue(res["OK"], res.get("Message")) # kill the job res = wmsClient.killJob(jobID) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobsStatus(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"][jobID]["Status"], JobStatus.KILLED, msg="Got %s" % str(res["Value"])) # delete the job - this will just set its status to "deleted" res = wmsClient.deleteJob(jobID) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobsStatus(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"][jobID]["Status"], JobStatus.DELETED, msg="Got %s" % str(res["Value"]))
result.setdefault(job, {})['Status'] = status result[job]['Node'] = node result[job]['LocalJobID'] = batchID wnJobs[node] = wnJobs.setdefault(node, 0) + 1 # If necessary get jobs' status statusCounters = {} if allJobs: allJobs = sorted(allJobs, reverse=True) res = monitoring.getJobsStatus(allJobs) if res['OK']: jobStatus = res['Value'] res = monitoring.getJobsMinorStatus(allJobs) if res['OK']: jobMinorStatus = res['Value'] res = monitoring.getJobsApplicationStatus(allJobs) if res['OK']: jobApplicationStatus = res['Value'] if not res['OK']: gLogger.error('Error getting job parameter', res['Message']) else: for job in allJobs: stat = jobStatus.get(job, {}).get('Status', 'Unknown') + '; ' + \ jobMinorStatus.get(job, {}).get('MinorStatus', 'Unknown') + '; ' + \ jobApplicationStatus.get(job, {}).get('ApplicationStatus', 'Unknown') result[job]['Status'] = stat statusCounters[stat] = statusCounters.setdefault(stat, 0) + 1 elif not workerNodes and not batchIDs: allJobs = sorted(jobs, reverse=True) # Print out result
def main(): site = 'BOINC.World.org' status = ["Running"] minorStatus = None workerNodes = None since = None date = 'today' full = False until = None batchIDs = None Script.registerSwitch('', 'Site=', ' Select site (default: %s)' % site) Script.registerSwitch('', 'Status=', ' Select status (default: %s)' % status) Script.registerSwitch('', 'MinorStatus=', ' Select minor status') Script.registerSwitch('', 'WorkerNode=', ' Select WN') Script.registerSwitch('', 'BatchID=', ' Select batch jobID') Script.registerSwitch( '', 'Since=', ' Date since when to select jobs, or number of days (default: today)' ) Script.registerSwitch('', 'Date=', ' Specify the date (check for a full day)') Script.registerSwitch( '', 'Full', ' Printout full list of job (default: False except if --WorkerNode)') Script.parseCommandLine() from DIRAC import gLogger from DIRAC.Interfaces.API.Dirac import Dirac from DIRAC.WorkloadManagementSystem.Client.JobMonitoringClient import JobMonitoringClient switches = Script.getUnprocessedSwitches() for switch in switches: if switch[0] == 'Site': site = switch[1] elif switch[0] == 'MinorStatus': minorStatus = switch[1] elif switch[0] == 'Status': if switch[1].lower() == 'all': status = [None] else: status = switch[1].split(',') elif switch[0] == 'WorkerNode': workerNodes = switch[1].split(',') elif switch[0] == 'BatchID': try: batchIDs = [int(id) for id in switch[1].split(',')] except BaseException: gLogger.error('Invalid jobID', switch[1]) DIRAC.exit(1) elif switch[0] == 'Full': full = True elif switch[0] == 'Date': since = switch[1].split()[0] until = str( datetime.datetime.strptime(since, '%Y-%m-%d') + datetime.timedelta(days=1)).split()[0] elif switch[0] == 'Since': date = switch[1].lower() if date == 'today': since = None elif date == 'yesterday': since = 1 elif date == 'ever': since = 2 * 365 elif date.isdigit(): since = int(date) date += ' days' else: since = date if isinstance(since, int): since = str(datetime.datetime.now() - datetime.timedelta(days=since)).split()[0] if workerNodes or batchIDs: # status = [None] full = True monitoring = JobMonitoringClient() dirac = Dirac() # Get jobs according to selection jobs = set() for stat in status: res = dirac.selectJobs(site=site, date=since, status=stat, minorStatus=minorStatus) if not res['OK']: gLogger.error('Error selecting jobs', res['Message']) DIRAC.exit(1) allJobs = set(int(job) for job in res['Value']) if until: res = dirac.selectJobs(site=site, date=until, status=stat) if not res['OK']: gLogger.error('Error selecting jobs', res['Message']) DIRAC.exit(1) allJobs -= set(int(job) for job in res['Value']) jobs.update(allJobs) if not jobs: gLogger.always('No jobs found...') DIRAC.exit(0) # res = monitoring.getJobsSummary( jobs ) # print eval( res['Value'] )[jobs[0]] allJobs = set() result = {} wnJobs = {} gLogger.always('%d jobs found' % len(jobs)) # Get host name for job in jobs: res = monitoring.getJobParameter(job, 'HostName') node = res.get('Value', {}).get('HostName', 'Unknown') res = monitoring.getJobParameter(job, 'LocalJobID') batchID = res.get('Value', {}).get('LocalJobID', 'Unknown') if workerNodes: if not [wn for wn in workerNodes if node.startswith(wn)]: continue allJobs.add(job) if batchIDs: if batchID not in batchIDs: continue allJobs.add(job) if full or status == [None]: allJobs.add(job) result.setdefault(job, {})['Status'] = status result[job]['Node'] = node result[job]['LocalJobID'] = batchID wnJobs[node] = wnJobs.setdefault(node, 0) + 1 # If necessary get jobs' status statusCounters = {} if allJobs: allJobs = sorted(allJobs, reverse=True) res = monitoring.getJobsStatus(allJobs) if res['OK']: jobStatus = res['Value'] res = monitoring.getJobsMinorStatus(allJobs) if res['OK']: jobMinorStatus = res['Value'] res = monitoring.getJobsApplicationStatus(allJobs) if res['OK']: jobApplicationStatus = res['Value'] if not res['OK']: gLogger.error('Error getting job parameter', res['Message']) else: for job in allJobs: stat = jobStatus.get(job, {}).get('Status', 'Unknown') + '; ' + \ jobMinorStatus.get(job, {}).get('MinorStatus', 'Unknown') + '; ' + \ jobApplicationStatus.get(job, {}).get('ApplicationStatus', 'Unknown') result[job]['Status'] = stat statusCounters[stat] = statusCounters.setdefault(stat, 0) + 1 elif not workerNodes and not batchIDs: allJobs = sorted(jobs, reverse=True) # Print out result if workerNodes or batchIDs: gLogger.always('Found %d jobs at %s, WN %s (since %s):' % (len(allJobs), site, workerNodes, date)) if allJobs: gLogger.always('List of jobs:', ','.join([str(job) for job in allJobs])) else: if status == [None]: gLogger.always('Found %d jobs at %s (since %s):' % (len(allJobs), site, date)) for stat in sorted(statusCounters): gLogger.always('%d jobs %s' % (statusCounters[stat], stat)) else: gLogger.always('Found %d jobs %s at %s (since %s):' % (len(allJobs), status, site, date)) gLogger.always( 'List of WNs:', ','.join([ '%s (%d)' % (node, wnJobs[node]) for node in sorted( wnJobs, cmp=(lambda n1, n2: (wnJobs[n2] - wnJobs[n1]))) ])) if full: if workerNodes or batchIDs: nodeJobs = {} for job in allJobs: status = result[job]['Status'] node = result[job]['Node'].split('.')[0] jobID = result[job].get('LocalJobID') nodeJobs.setdefault(node, []).append((jobID, job, status)) if not workerNodes: workerNodes = sorted(nodeJobs) for node in workerNodes: for job in nodeJobs.get(node.split('.')[0], []): gLogger.always('%s ' % node + '(%s): %s - %s' % job) else: for job in allJobs: status = result[job]['Status'] node = result[job]['Node'] jobID = result[job].get('LocalJobID') gLogger.always('%s (%s): %s - %s' % (node, jobID, job, status))