def test_ParametricChain(self): """ This test will submit a parametric job which should generate 3 actual jobs """ wmsClient = WMSClient() jobStateUpdate = JobStateUpdateClient() jobMonitor = JobMonitoringClient() # create the job job = parametricJob() jobDescription = createFile(job) # submit the job result = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(result['OK']) jobIDList = result['Value'] self.assertEqual(len(jobIDList), 3) result = jobMonitor.getJobsParameters(jobIDList, ['JobName']) self.assertTrue(result['OK']) jobNames = [result['Value'][jobID]['JobName'] for jobID in result['Value']] self.assertEqual(set(jobNames), set(['parametric_helloWorld_%s' % nJob for nJob in range(3)])) for jobID in jobIDList: result = jobStateUpdate.setJobStatus(jobID, 'Done', 'matching', 'source') self.assertTrue(result['OK']) result = wmsClient.deleteJob(jobIDList) self.assertTrue(result['OK']) for jobID in jobIDList: result = jobMonitor.getJobStatus(jobID) self.assertTrue(result['OK']) self.assertEqual(result['Value'], 'Deleted')
def test_ParametricChain(self): """ This test will submit a parametric job which should generate 3 actual jobs """ wmsClient = WMSClient() jobStateUpdate = JobStateUpdateClient() jobMonitor = JobMonitoringClient() # create the job job = parametricJob() jobDescription = createFile(job) # submit the job result = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(result['OK']) jobIDList = result['Value'] self.assertEqual(len(jobIDList), 3) result = jobMonitor.getJobsParameters(jobIDList, ['JobName']) self.assertTrue(result['OK']) jobNames = [result['Value'][jobID]['JobName'] for jobID in result['Value']] self.assertEqual(set(jobNames), set(['parametric_helloWorld_%s' % nJob for nJob in range(3)])) for jobID in jobIDList: result = jobStateUpdate.setJobStatus(jobID, 'Done', 'matching', 'source') self.assertTrue(result['OK']) result = wmsClient.deleteJob(jobIDList) self.assertTrue(result['OK']) for jobID in jobIDList: result = jobMonitor.getJobStatus(jobID) self.assertTrue(result['OK']) self.assertEqual(result['Value'], 'Deleted')
def test_ParametricChain(self): """This test will submit a parametric job which should generate 3 actual jobs""" wmsClient = WMSClient() jobStateUpdate = JobStateUpdateClient() jobMonitor = JobMonitoringClient() # create the job job = parametricJob() jobDescription = createFile(job) # submit the job res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(res["OK"], res.get("Message")) jobIDList = res["Value"] self.assertEqual(len(jobIDList), 3, msg="Got %s" % str(jobIDList)) res = jobMonitor.getJobsParameters(jobIDList, ["JobName"]) self.assertTrue(res["OK"], res.get("Message")) jobNames = [res["Value"][jobID]["JobName"] for jobID in res["Value"]] self.assertEqual( set(jobNames), set(["parametric_helloWorld_%s" % nJob for nJob in range(3)])) for jobID in jobIDList: res = jobStateUpdate.setJobStatus(jobID, JobStatus.CHECKING, "checking", "source") self.assertTrue(res["OK"], res.get("Message")) res = wmsClient.deleteJob(jobIDList) self.assertTrue(res["OK"], res.get("Message")) print(res) for jobID in jobIDList: res = jobMonitor.getJobsStatus(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"][jobID]["Status"], JobStatus.DELETED, msg="Got %s" % str(res["Value"]))
def test_FullChain(self): """ This test will - call all the WMSClient methods that will end up calling all the JobManager service methods - use the JobMonitoring to verify few properties - call the JobCleaningAgent to eliminate job entries from the DBs """ wmsClient = WMSClient() jobMonitor = JobMonitoringClient() jobStateUpdate = JobStateUpdateClient() # create the job job = helloWorldJob() jobDescription = createFile(job) # submit the job res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(res['OK']) self.assertTrue(isinstance(res['Value'], int)) self.assertEqual(res['Value'], res['JobID']) jobID = res['JobID'] jobID = res['Value'] # updating the status jobStateUpdate.setJobStatus(jobID, 'Running', 'Executing Minchiapp', 'source') # reset the job res = wmsClient.resetJob(jobID) self.assertTrue(res['OK']) # reschedule the job res = wmsClient.rescheduleJob(jobID) self.assertTrue(res['OK']) res = jobMonitor.getJobStatus(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Received') # updating the status again jobStateUpdate.setJobStatus(jobID, 'Matched', 'matching', 'source') # kill the job res = wmsClient.killJob(jobID) self.assertTrue(res['OK']) res = jobMonitor.getJobStatus(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Killed') # updating the status aaaagain jobStateUpdate.setJobStatus(jobID, 'Done', 'matching', 'source') # kill the job res = wmsClient.killJob(jobID) self.assertTrue(res['OK']) res = jobMonitor.getJobStatus(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Done') # this time it won't kill... it's done! # delete the job - this will just set its status to "deleted" res = wmsClient.deleteJob(jobID) self.assertTrue(res['OK']) res = jobMonitor.getJobStatus(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Deleted')
def test_JobStateUpdateAndJobMonitoring(self): """ Verifying all JobStateUpdate and JobMonitoring functions """ wmsClient = WMSClient() jobMonitor = JobMonitoringClient() jobStateUpdate = JobStateUpdateClient() # create a job and check stuff job = helloWorldJob() jobDescription = createFile(job) # submitting the job. Checking few stuff res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(res['OK']) jobID = int(res['Value']) # jobID = res['JobID'] res = jobMonitor.getJobJDL(jobID, True) self.assertTrue(res['OK']) res = jobMonitor.getJobJDL(jobID, False) self.assertTrue(res['OK']) res = jobMonitor.getJobsParameters([jobID], []) self.assertTrue(res['OK']) self.assertEqual(res['Value'], {}) res = jobMonitor.getJobsParameters([jobID], ['Owner']) self.assertTrue(res['OK']) # Adding stuff res = jobStateUpdate.setJobStatus(jobID, 'Matched', 'matching', 'source') self.assertTrue(res['OK']) res = jobStateUpdate.setJobParameters(jobID, [('par1', 'par1Value'), ('par2', 'par2Value')]) self.assertTrue(res['OK']) res = jobStateUpdate.setJobApplicationStatus(jobID, 'app status', 'source') self.assertTrue(res['OK']) # res = jobStateUpdate.setJobFlag() # self.assertTrue(res['OK']) # res = jobStateUpdate.unsetJobFlag() # self.assertTrue(res['OK']) res = jobStateUpdate.setJobSite(jobID, 'Site') self.assertTrue(res['OK']) # res = jobMonitor.traceJobParameter( 'Site', 1, 'Status' ) # self.assertTrue(res['OK']) # now checking few things res = jobMonitor.getJobStatus(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Running') res = jobMonitor.getJobParameter(jobID, 'par1') self.assertTrue(res['OK']) self.assertEqual(res['Value'], {'par1': 'par1Value'}) res = jobMonitor.getJobParameters(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], {'par1': 'par1Value', 'par2': 'par2Value'}) res = jobMonitor.getJobAttribute(jobID, 'Site') self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Site') res = jobMonitor.getJobAttributes(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value']['ApplicationStatus'], 'app status') self.assertEqual(res['Value']['JobName'], 'helloWorld') res = jobMonitor.getJobSummary(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value']['ApplicationStatus'], 'app status') self.assertEqual(res['Value']['Status'], 'Running') res = jobMonitor.getJobHeartBeatData(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], []) res = jobMonitor.getInputData(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], []) res = jobMonitor.getJobPrimarySummary(jobID) self.assertTrue(res['OK']) res = jobMonitor.getAtticJobParameters(jobID) self.assertTrue(res['OK']) res = jobStateUpdate.setJobsStatus([jobID], 'Done', 'MinorStatus', 'Unknown') self.assertTrue(res['OK']) res = jobMonitor.getJobSummary(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value']['Status'], 'Done') self.assertEqual(res['Value']['MinorStatus'], 'MinorStatus') self.assertEqual(res['Value']['ApplicationStatus'], 'app status') res = jobStateUpdate.sendHeartBeat(jobID, {'bih': 'bih'}, {'boh': 'boh'}) self.assertTrue(res['OK']) # delete the job - this will just set its status to "deleted" wmsClient.deleteJob(jobID)
def finalizeRequest(self, requestID, jobID, useCertificates=True): """check request status and perform finalization if necessary update the request status and the corresponding job parameter :param self: self reference :param str requestID: request id :param int jobID: job id """ stateServer = JobStateUpdateClient(useCertificates=useCertificates) # Checking if to update the job status - we should fail here, so it will be re-tried later # Checking the state, first res = self.getRequestStatus(requestID) if not res["OK"]: self.log.error( "finalizeRequest: failed to get request", "request: %s status: %s" % (requestID, res["Message"])) return res if res["Value"] != "Done": return S_ERROR( "The request %s isn't 'Done' but '%s', this should never happen, why are we here?" % (requestID, res["Value"])) # The request is 'Done', let's update the job status. If we fail, we should re-try later monitorServer = JobMonitoringClient(useCertificates=useCertificates) res = monitorServer.getJobSummary(int(jobID)) if not res["OK"]: self.log.error("finalizeRequest: Failed to get job status", "JobID: %d" % jobID) return res elif not res["Value"]: self.log.info( "finalizeRequest: job %d does not exist (anymore): finalizing" % jobID) return S_OK() else: jobStatus = res["Value"]["Status"] jobMinorStatus = res["Value"]["MinorStatus"] jobAppStatus = "" newJobStatus = "" if jobStatus == JobStatus.STALLED: # If job is stalled, find the previous status from the logging info res = monitorServer.getJobLoggingInfo(int(jobID)) if not res["OK"]: self.log.error( "finalizeRequest: Failed to get job logging info", "JobID: %d" % jobID) return res # Check the last status was Stalled and get the one before if len(res["Value"] ) >= 2 and res["Value"][-1][0] == JobStatus.STALLED: jobStatus, jobMinorStatus, jobAppStatus = res["Value"][ -2][:3] newJobStatus = jobStatus # update the job pending request digest in any case since it is modified self.log.info( "finalizeRequest: Updating request digest for job %d" % jobID) digest = self.getDigest(requestID) if digest["OK"]: digest = digest["Value"] self.log.verbose(digest) res = stateServer.setJobParameter(jobID, "PendingRequest", digest) if not res["OK"]: self.log.info( "finalizeRequest: Failed to set job %d parameter: %s" % (jobID, res["Message"])) return res else: self.log.error( "finalizeRequest: Failed to get request digest for %s: %s" % (requestID, digest["Message"])) if jobStatus == JobStatus.COMPLETED: # What to do? Depends on what we have in the minorStatus if jobMinorStatus == JobMinorStatus.PENDING_REQUESTS: newJobStatus = JobStatus.DONE elif jobMinorStatus == JobMinorStatus.APP_ERRORS: newJobStatus = JobStatus.FAILED elif jobMinorStatus == JobMinorStatus.MARKED_FOR_TERMINATION: # If the job has been Killed, set it Killed newJobStatus = JobStatus.KILLED else: self.log.error( "finalizeRequest: Unexpected jobMinorStatus", "for %d (got %s)" % (jobID, jobMinorStatus)) return S_ERROR("Unexpected jobMinorStatus") if newJobStatus: self.log.info( "finalizeRequest: Updating job status", "for %d to '%s/%s'" % (jobID, newJobStatus, JobMinorStatus.REQUESTS_DONE), ) else: self.log.info( "finalizeRequest: Updating job minor status", "for %d to '%s' (current status is %s)" % (jobID, JobMinorStatus.REQUESTS_DONE, jobStatus), ) stateUpdate = stateServer.setJobStatus( jobID, newJobStatus, JobMinorStatus.REQUESTS_DONE, "RMS") if jobAppStatus and stateUpdate["OK"]: stateUpdate = stateServer.setJobApplicationStatus( jobID, jobAppStatus, "RMS") if not stateUpdate["OK"]: self.log.error( "finalizeRequest: Failed to set job status", "JobID: %d, error: %s" % (jobID, stateUpdate["Message"]), ) return stateUpdate return S_OK(newJobStatus)
class TransformationInfo(object): """Hold information about a transformation.""" def __init__(self, transformationID, transInfoDict, enabled, tClient, fcClient, jobMon): """Store clients etc.""" self.log = gLogger.getSubLogger(__name__ + "[%s]" % transformationID) self.enabled = enabled self.tID = transformationID self.transName = transInfoDict['TransformationName'] self.tClient = tClient self.jobMon = jobMon self.fcClient = fcClient self.transType = transInfoDict['Type'] self.authorDN = transInfoDict['AuthorDN'] self.authorGroup = transInfoDict['AuthorGroup'] self.jobStateClient = JobStateUpdateClient() def checkTasksStatus(self): """Check the status for the task of given transformation and taskID""" res = self.tClient.getTransformationFiles( condDict={'TransformationID': self.tID}) if not res['OK']: raise RuntimeError("Failed to get transformation tasks: %s" % res['Message']) tasksDict = defaultdict(list) for task in res['Value']: taskID = task['TaskID'] lfn = task['LFN'] status = task['Status'] fileID = task['FileID'] errorCount = task['ErrorCount'] tasksDict[taskID].append( dict(FileID=fileID, LFN=lfn, Status=status, ErrorCount=errorCount)) return tasksDict def setJobDone(self, job): """ set the taskID to Done""" if not self.enabled: return self.__setTaskStatus(job, 'Done') if job.status != 'Done': self.__updateJobStatus(job.jobID, 'Done', "Job forced to Done") def setJobFailed(self, job): """ set the taskID to Done""" if not self.enabled: return self.__setTaskStatus(job, 'Failed') if job.status != 'Failed': self.__updateJobStatus(job.jobID, "Failed", "Job forced to Failed") def setInputUnused(self, job): """Set the inputfiles to unused""" self.__setInputStatus(job, 'Unused') def setInputMaxReset(self, job): """set the inputfile to MaxReset""" self.__setInputStatus(job, "MaxReset") def setInputProcessed(self, job): """set the inputfile to processed""" self.__setInputStatus(job, "Processed") def setInputDeleted(self, job): """set the inputfile to processed""" self.__setInputStatus(job, "Deleted") def __setInputStatus(self, job, status): """set the input file to status""" if self.enabled: result = self.tClient.setFileStatusForTransformation( self.tID, status, job.inputFiles, force=True) if not result['OK']: gLogger.error("Failed updating status", result['Message']) raise RuntimeError("Failed updating file status") def __setTaskStatus(self, job, status): """update the task in the TransformationDB""" taskID = job.taskID res = self.tClient.setTaskStatus(self.transName, taskID, status) if not res['OK']: raise RuntimeError("Failed updating task status: %s" % res['Message']) def __updateJobStatus(self, jobID, status, minorstatus=''): """Update the job status.""" if self.enabled: source = 'DataRecoveryAgent' result = self.jobStateClient.setJobStatus(jobID, status, minorstatus, source) else: return S_OK('DisabledMode') if not result['OK']: self.log.error('Failed to update job status', result['Message']) raise RuntimeError('Failed to update job status') return result def __findAllDescendants(self, lfnList): """Find all descendants of a list of LFNs""" allDescendants = [] result = self.fcClient.getFileDescendents(lfnList, range(1, 8)) if not result['OK']: return allDescendants for dummy_lfn, descendants in result['Value']['Successful'].items(): allDescendants.extend(descendants) return allDescendants def cleanOutputs(self, jobInfo): """Remove all job outputs for job represented by jobInfo object. Including removal of descendents, if defined. """ if len(jobInfo.outputFiles) == 0: return descendants = self.__findAllDescendants(jobInfo.outputFiles) existingOutputFiles = [ lfn for lfn, status in izip_longest(jobInfo.outputFiles, jobInfo.outputFileStatus) if status == "Exists" ] filesToDelete = existingOutputFiles + descendants if not filesToDelete: return if not self.enabled: self.log.notice("Would have removed these files: \n +++ %s " % "\n +++ ".join(filesToDelete)) return self.log.notice("Remove these files: \n +++ %s " % "\n +++ ".join(filesToDelete)) errorReasons = defaultdict(list) successfullyRemoved = 0 for lfnList in breakListIntoChunks(filesToDelete, 200): with UserProxy(proxyUserDN=self.authorDN, proxyUserGroup=self.authorGroup) as proxyResult: if not proxyResult['OK']: raise RuntimeError('Failed to get a proxy: %s' % proxyResult['Message']) result = DataManager().removeFile(lfnList) if not result['OK']: self.log.error("Failed to remove LFNs", result['Message']) raise RuntimeError("Failed to remove LFNs: %s" % result['Message']) for lfn, err in result['Value']['Failed'].items(): reason = str(err) errorReasons[reason].append(lfn) successfullyRemoved += len( result['Value']['Successful'].keys()) for reason, lfns in errorReasons.items(): self.log.error("Failed to remove %d files with error: %s" % (len(lfns), reason)) self.log.notice("Successfully removed %d files" % successfullyRemoved) def getJobs(self, statusList=None): """Get done and failed jobs. :param list statusList: optional list of status to find jobs :returns: 3-tuple of OrderedDict of JobInfo objects, keyed by jobID; number of Done jobs; number of Failed jobs """ done = S_OK([]) failed = S_OK([]) if statusList is None: statusList = ['Done', 'Failed'] if 'Done' in statusList: self.log.notice("Getting 'Done' Jobs...") done = self.__getJobs(["Done"]) if 'Failed' in statusList: self.log.notice("Getting 'Failed' Jobs...") failed = self.__getJobs(["Failed"]) done = done['Value'] failed = failed['Value'] jobsUnsorted = {} for job in done: jobsUnsorted[int(job)] = JobInfo(job, "Done", self.tID, self.transType) for job in failed: jobsUnsorted[int(job)] = JobInfo(job, "Failed", self.tID, self.transType) jobs = OrderedDict(sorted(jobsUnsorted.items(), key=lambda t: t[0])) self.log.notice("Found %d Done Jobs " % len(done)) self.log.notice("Found %d Failed Jobs " % len(failed)) return jobs, len(done), len(failed) def __getJobs(self, status): """Return list of jobs with given status. :param list status: list of status to find :returns: S_OK with result :raises: RuntimeError when failing to find jobs """ attrDict = dict(Status=status, JobGroup='%08d' % int(self.tID)) res = self.jobMon.getJobs(attrDict) if res['OK']: self.log.debug('Found Trans jobs: %s' % res['Value']) return res else: self.log.error('Error finding jobs: ', res['Message']) raise RuntimeError('Failed to get jobs')
class TransformationInfo(object): """Hold information about a transformation.""" def __init__(self, transformationID, transInfoDict, enabled, tClient, fcClient, jobMon): """Store clients etc.""" self.log = gLogger.getSubLogger(__name__ + "[%s]" % transformationID) self.enabled = enabled self.tID = transformationID self.transName = transInfoDict['TransformationName'] self.tClient = tClient self.jobMon = jobMon self.fcClient = fcClient self.transType = transInfoDict['Type'] self.authorDN = transInfoDict['AuthorDN'] self.authorGroup = transInfoDict['AuthorGroup'] self.jobStateClient = JobStateUpdateClient() def checkTasksStatus( self ): """Check the status for the task of given transformation and taskID""" res = self.tClient.getTransformationFiles( condDict = { 'TransformationID': self.tID } ) if not res['OK']: raise RuntimeError( "Failed to get transformation tasks: %s" % res['Message'] ) tasksDict = {} for task in res['Value']: taskID = task['TaskID'] lfn = task['LFN'] status = task['Status'] fileID = task['FileID'] errorCount = task['ErrorCount'] tasksDict[taskID] = dict( FileID=fileID, LFN=lfn, Status=status, ErrorCount=errorCount ) return tasksDict def setJobDone( self, job ): """ set the taskID to Done""" if not self.enabled: return self.__setTaskStatus( job, 'Done' ) if job.status != 'Done': self.__updateJobStatus( job.jobID, 'Done', "Job forced to Done" ) def setJobFailed( self, job ): """ set the taskID to Done""" if not self.enabled: return self.__setTaskStatus( job, 'Failed' ) if job.status != 'Failed': self.__updateJobStatus( job.jobID, "Failed", "Job forced to Failed" ) def setInputUnused( self, job ): """set the inputfile to unused""" self.__setInputStatus( job, "Unused" ) def setInputMaxReset( self, job ): """set the inputfile to MaxReset""" self.__setInputStatus( job, "MaxReset" ) def setInputProcessed( self, job ): """set the inputfile to processed""" self.__setInputStatus( job, "Processed" ) def setInputDeleted( self, job ): """set the inputfile to processed""" self.__setInputStatus( job, "Deleted" ) def __setInputStatus( self, job, status ): """set the input file to status""" if self.enabled: result = self.tClient.setFileStatusForTransformation(self.tID, status, [job.inputFile], force = True) if not result['OK']: gLogger.error( "Failed updating status", result['Message'] ) raise RuntimeError( "Failed updating file status" ) def __setTaskStatus( self, job, status ): """update the task in the TransformationDB""" taskID = job.taskID res = self.tClient.setTaskStatus( self.transName, taskID, status ) if not res['OK']: raise RuntimeError( "Failed updating task status: %s" % res['Message'] ) def __updateJobStatus(self, jobID, status, minorstatus=''): """Update the job status.""" if self.enabled: source = 'DataRecoveryAgent' result = self.jobStateClient.setJobStatus(jobID, status, minorstatus, source) else: return S_OK('DisabledMode') if not result['OK']: self.log.error('Failed to update job status', result['Message']) raise RuntimeError('Failed to update job status') return result def __findAllDescendants( self, lfnList ): """finds all descendants of a list of LFNs""" allDescendants = [] result = self.fcClient.getFileDescendents( lfnList, range(1,8) ) if not result['OK']: return allDescendants for dummy_lfn, descendants in result['Value']['Successful'].items(): allDescendants.extend( descendants ) return allDescendants def cleanOutputs( self, jobInfo ): """remove all job outputs""" if len(jobInfo.outputFiles) == 0: return descendants = self.__findAllDescendants( jobInfo.outputFiles ) existingOutputFiles = [ lfn for lfn, status in izip_longest(jobInfo.outputFiles, jobInfo.outputFileStatus) if status=="Exists" ] filesToDelete = existingOutputFiles + descendants if not filesToDelete: return if not self.enabled: self.log.notice( "Would have removed these files: \n +++ %s " % "\n +++ ".join(filesToDelete) ) return self.log.notice( "Remove these files: \n +++ %s " % "\n +++ ".join(filesToDelete) ) errorReasons = defaultdict(list) successfullyRemoved = 0 for lfnList in breakListIntoChunks(filesToDelete, 200): with UserProxy(proxyUserDN=self.authorDN, proxyUserGroup=self.authorGroup) as proxyResult: if not proxyResult['OK']: raise RuntimeError('Failed to get a proxy: %s' % proxyResult['Message']) result = DataManager().removeFile(lfnList) if not result['OK']: self.log.error("Failed to remove LFNs", result['Message']) raise RuntimeError("Failed to remove LFNs: %s" % result['Message']) for lfn, err in result['Value']['Failed'].items(): reason = str(err) errorReasons[reason].append(lfn) successfullyRemoved += len(result['Value']['Successful'].keys()) for reason, lfns in errorReasons.items(): self.log.error("Failed to remove %d files with error: %s" % (len(lfns), reason)) self.log.notice("Successfully removed %d files" % successfullyRemoved) def getJobs( self, statusList=None ): """get done and failed jobs""" done = S_OK([]) failed = S_OK([]) if statusList is None: statusList = [ 'Done', 'Failed' ] if 'Done' in statusList: self.log.notice( "Getting 'Done' Jobs..." ) done = self.__getJobs( ["Done"] ) if 'Failed' in statusList: self.log.notice( "Getting 'Failed' Jobs..." ) failed = self.__getJobs( ["Failed"] ) done = done['Value'] failed = failed['Value'] jobsUnsorted = {} for job in done: jobsUnsorted[int(job)] = JobInfo( job, "Done", self.tID, self.transType ) for job in failed: jobsUnsorted[int(job)] = JobInfo( job, "Failed", self.tID, self.transType ) jobs = OrderedDict( sorted(jobsUnsorted.items(), key=lambda t: t[0]) ) self.log.notice( "Found %d Done Jobs " % len(done) ) self.log.notice( "Found %d Failed Jobs " % len(failed) ) return jobs, len(done), len(failed) def __getJobs( self, status ): """returns list of done jobs""" attrDict = dict( Status=status, JobGroup="%08d" % int(self.tID) ) # if 'Done' in status: # resAppStates = self.jobMon.getApplicationStates() # if not resAppStates['OK']: # raise RuntimeError( "Failed to get application states" ) # appStates = resAppStates['Value'] # appStates.remove( "Job Finished Successfully" ) # attrDict['ApplicationStatus'] = appStates res = self.jobMon.getJobs( attrDict ) if res['OK']: self.log.debug("Found Prod jobs: %s" % res['Value'] ) return res else: self.log.error("Error finding jobs: ", res['Message'] ) raise RuntimeError( "Failed to get jobs" )
def test_FullChain(self): """This test will - call all the WMSClient methods that will end up calling all the JobManager service methods - use the JobMonitoring to verify few properties - call the JobCleaningAgent to eliminate job entries from the DBs """ wmsClient = WMSClient() jobMonitor = JobMonitoringClient() jobStateUpdate = JobStateUpdateClient() # create the job job = helloWorldJob() jobDescription = createFile(job) # submit the job res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(res["OK"], res.get("Message")) self.assertTrue(isinstance(res["Value"], int), msg="Got %s" % type(res["Value"])) self.assertEqual(res["Value"], res["JobID"], msg="Got %s, expected %s" % (str(res["Value"]), res["JobID"])) jobID = res["JobID"] jobID = res["Value"] # updating the status res = jobStateUpdate.setJobStatus(jobID, JobStatus.RUNNING, "Executing Minchiapp", "source") self.assertTrue(res["OK"], res.get("Message")) # reset the job res = wmsClient.resetJob(jobID) self.assertTrue(res["OK"], res.get("Message")) # reschedule the job res = wmsClient.rescheduleJob(jobID) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobsStatus(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"][jobID]["Status"], JobStatus.RECEIVED, msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobsMinorStatus([jobID]) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"], {jobID: { "MinorStatus": "Job Rescheduled" }}, msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobsApplicationStatus([jobID]) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"], {jobID: { "ApplicationStatus": "Unknown" }}, msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobsStates([jobID]) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual( res["Value"], { jobID: { "Status": JobStatus.RECEIVED, "MinorStatus": "Job Rescheduled", "ApplicationStatus": "Unknown" } }, msg="Got %s" % str(res["Value"]), ) # updating the status again res = jobStateUpdate.setJobStatus(jobID, JobStatus.CHECKING, "checking", "source") self.assertTrue(res["OK"], res.get("Message")) res = jobStateUpdate.setJobStatus(jobID, JobStatus.WAITING, "waiting", "source") self.assertTrue(res["OK"], res.get("Message")) res = jobStateUpdate.setJobStatus(jobID, JobStatus.MATCHED, "matched", "source") self.assertTrue(res["OK"], res.get("Message")) # kill the job res = wmsClient.killJob(jobID) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobsStatus(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"][jobID]["Status"], JobStatus.KILLED, msg="Got %s" % str(res["Value"])) # delete the job - this will just set its status to "deleted" res = wmsClient.deleteJob(jobID) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobsStatus(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"][jobID]["Status"], JobStatus.DELETED, msg="Got %s" % str(res["Value"]))
def test_JobStateUpdateAndJobMonitoring(self): """Verifying all JobStateUpdate and JobMonitoring functions""" wmsClient = WMSClient() jobMonitor = JobMonitoringClient() jobStateUpdate = JobStateUpdateClient() # create a job and check stuff job = helloWorldJob() jobDescription = createFile(job) # submitting the job. Checking few stuff res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(res["OK"], res.get("Message")) jobID = int(res["Value"]) # jobID = res['JobID'] res = jobMonitor.getJobJDL(jobID, True) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobJDL(jobID, False) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobsParameters([jobID], []) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobOwner(jobID) self.assertTrue(res["OK"], res.get("Message")) # Adding stuff # forcing the update res = jobStateUpdate.setJobStatus(jobID, JobStatus.RUNNING, "running", "source", None, True) self.assertTrue(res["OK"], res.get("Message")) res = jobStateUpdate.setJobParameters(jobID, [("par1", "par1Value"), ("par2", "par2Value")]) time.sleep(5) self.assertTrue(res["OK"], res.get("Message")) res = jobStateUpdate.setJobApplicationStatus(jobID, "app status", "source") self.assertTrue(res["OK"], res.get("Message")) # res = jobStateUpdate.setJobFlag() # self.assertTrue(res['OK'], res.get('Message')) # res = jobStateUpdate.unsetJobFlag() # self.assertTrue(res['OK'], res.get('Message')) res = jobStateUpdate.setJobSite(jobID, "Site") self.assertTrue(res["OK"], res.get("Message")) # now checking few things res = jobMonitor.getJobsStatus(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"][jobID]["Status"], JobStatus.RUNNING, msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobParameter(jobID, "par1") self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"], {"par1": "par1Value"}, msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobParameters(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"], {jobID: { "par1": "par1Value", "par2": "par2Value" }}, msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobParameters(jobID, "par1") self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"], {jobID: { "par1": "par1Value" }}, msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobAttribute(jobID, "Site") self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"], "Site", msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobAttributes(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"]["ApplicationStatus"], "app status", msg="Got %s" % str(res["Value"]["ApplicationStatus"])) self.assertEqual(res["Value"]["JobName"], "helloWorld", msg="Got %s" % str(res["Value"]["JobName"])) res = jobMonitor.getJobSummary(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"]["ApplicationStatus"], "app status", msg="Got %s" % str(res["Value"]["ApplicationStatus"])) self.assertEqual(res["Value"]["Status"], JobStatus.RUNNING, msg="Got %s" % str(res["Value"]["Status"])) res = jobMonitor.getJobHeartBeatData(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"], [], msg="Got %s" % str(res["Value"])) res = jobMonitor.getInputData(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"], [], msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobSummary(jobID) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getAtticJobParameters(jobID) self.assertTrue(res["OK"], res.get("Message")) res = jobStateUpdate.setJobStatus(jobID, JobStatus.DONE, "MinorStatus", "Unknown") self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobSummary(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"]["Status"], JobStatus.DONE, msg="Got %s" % str(res["Value"]["Status"])) self.assertEqual(res["Value"]["MinorStatus"], "MinorStatus", msg="Got %s" % str(res["Value"]["MinorStatus"])) self.assertEqual(res["Value"]["ApplicationStatus"], "app status", msg="Got %s" % str(res["Value"]["ApplicationStatus"])) res = jobStateUpdate.sendHeartBeat(jobID, {"bih": "bih"}, {"boh": "boh"}) self.assertTrue(res["OK"], res.get("Message")) # delete the job - this will just set its status to "deleted" wmsClient.deleteJob(jobID)
def test_FullChain(self): """ This test will - call all the WMSClient methods that will end up calling all the JobManager service methods - use the JobMonitoring to verify few properties - call the JobCleaningAgent to eliminate job entries from the DBs """ wmsClient = WMSClient() jobMonitor = JobMonitoringClient() jobStateUpdate = JobStateUpdateClient() # create the job job = helloWorldJob() jobDescription = createFile(job) # submit the job res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(res['OK']) self.assertTrue(isinstance(res['Value'], int)) self.assertEqual(res['Value'], res['JobID']) jobID = res['JobID'] jobID = res['Value'] # updating the status jobStateUpdate.setJobStatus(jobID, 'Running', 'Executing Minchiapp', 'source') # reset the job res = wmsClient.resetJob(jobID) self.assertTrue(res['OK']) # reschedule the job res = wmsClient.rescheduleJob(jobID) self.assertTrue(res['OK']) res = jobMonitor.getJobStatus(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Received') # updating the status again jobStateUpdate.setJobStatus(jobID, 'Matched', 'matching', 'source') # kill the job res = wmsClient.killJob(jobID) self.assertTrue(res['OK']) res = jobMonitor.getJobStatus(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Killed') # updating the status aaaagain jobStateUpdate.setJobStatus(jobID, 'Done', 'matching', 'source') # kill the job res = wmsClient.killJob(jobID) self.assertTrue(res['OK']) res = jobMonitor.getJobStatus(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Done') # this time it won't kill... it's done! # delete the job - this will just set its status to "deleted" res = wmsClient.deleteJob(jobID) self.assertTrue(res['OK']) res = jobMonitor.getJobStatus(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Deleted')
def test_JobStateUpdateAndJobMonitoring(self): """ Verifying all JobStateUpdate and JobMonitoring functions """ wmsClient = WMSClient() jobMonitor = JobMonitoringClient() jobStateUpdate = JobStateUpdateClient() # create a job and check stuff job = helloWorldJob() jobDescription = createFile(job) # submitting the job. Checking few stuff res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(res['OK']) jobID = int(res['Value']) # jobID = res['JobID'] res = jobMonitor.getJobJDL(jobID, True) self.assertTrue(res['OK']) res = jobMonitor.getJobJDL(jobID, False) self.assertTrue(res['OK']) res = jobMonitor.getJobsParameters([jobID], []) self.assertTrue(res['OK']) self.assertEqual(res['Value'], {}) res = jobMonitor.getJobsParameters([jobID], ['Owner']) self.assertTrue(res['OK']) # Adding stuff res = jobStateUpdate.setJobStatus(jobID, 'Matched', 'matching', 'source') self.assertTrue(res['OK']) res = jobStateUpdate.setJobParameters(jobID, [('par1', 'par1Value'), ('par2', 'par2Value')]) self.assertTrue(res['OK']) res = jobStateUpdate.setJobApplicationStatus(jobID, 'app status', 'source') self.assertTrue(res['OK']) # res = jobStateUpdate.setJobFlag() # self.assertTrue(res['OK']) # res = jobStateUpdate.unsetJobFlag() # self.assertTrue(res['OK']) res = jobStateUpdate.setJobSite(jobID, 'Site') self.assertTrue(res['OK']) # res = jobMonitor.traceJobParameter( 'Site', 1, 'Status' ) # self.assertTrue(res['OK']) # now checking few things res = jobMonitor.getJobStatus(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Running') res = jobMonitor.getJobParameter(jobID, 'par1') self.assertTrue(res['OK']) self.assertEqual(res['Value'], {'par1': 'par1Value'}) res = jobMonitor.getJobParameters(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], {jobID: {'par1': 'par1Value', 'par2': 'par2Value'}}) res = jobMonitor.getJobParameters(jobID, 'par1') self.assertTrue(res['OK']) self.assertEqual(res['Value'], {jobID: {'par1': 'par1Value'}}) res = jobMonitor.getJobAttribute(jobID, 'Site') self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Site') res = jobMonitor.getJobAttributes(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value']['ApplicationStatus'], 'app status') self.assertEqual(res['Value']['JobName'], 'helloWorld') res = jobMonitor.getJobSummary(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value']['ApplicationStatus'], 'app status') self.assertEqual(res['Value']['Status'], 'Running') res = jobMonitor.getJobHeartBeatData(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], []) res = jobMonitor.getInputData(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], []) res = jobMonitor.getJobPrimarySummary(jobID) self.assertTrue(res['OK']) res = jobMonitor.getAtticJobParameters(jobID) self.assertTrue(res['OK']) res = jobStateUpdate.setJobsStatus([jobID], 'Done', 'MinorStatus', 'Unknown') self.assertTrue(res['OK']) res = jobMonitor.getJobSummary(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value']['Status'], 'Done') self.assertEqual(res['Value']['MinorStatus'], 'MinorStatus') self.assertEqual(res['Value']['ApplicationStatus'], 'app status') res = jobStateUpdate.sendHeartBeat(jobID, {'bih': 'bih'}, {'boh': 'boh'}) self.assertTrue(res['OK']) # delete the job - this will just set its status to "deleted" wmsClient.deleteJob(jobID)