class JobManagerHandler(RequestHandler): @classmethod def initializeHandler(cls, serviceInfoDict): cls.msgClient = MessageClient("WorkloadManagement/OptimizationMind") cls.__connectToOptMind() gThreadScheduler.addPeriodicTask(60, cls.__connectToOptMind) return S_OK() @classmethod def __connectToOptMind(cls): if not cls.msgClient.connected: result = cls.msgClient.connect(JobManager=True) if not result['OK']: cls.log.warn("Cannot connect to OptimizationMind!", result['Message']) def initialize(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] self.userProperties = credDict['properties'] self.owner = credDict['username'] self.peerUsesLimitedProxy = credDict['isLimitedProxy'] self.diracSetup = self.serviceInfoDict['clientSetup'] self.maxParametricJobs = self.srv_getCSOption('MaxParametricJobs', MAX_PARAMETRIC_JOBS) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.userProperties) self.jobPolicy.setJobDB(gJobDB) return S_OK() def __sendJobsToOptimizationMind(self, jids): if not self.msgClient.connected: return result = self.msgClient.createMessage("OptimizeJobs") if not result['OK']: self.log.error("Cannot create Optimize message: %s" % result['Message']) return msgObj = result['Value'] msgObj.jids = list(sorted(jids)) result = self.msgClient.sendMessage(msgObj) if not result['OK']: self.log.error("Cannot send Optimize message: %s" % result['Message']) return self.log.info("Optimize msg sent for %s jobs" % len(jids)) ########################################################################### types_submitJob = [StringTypes] def export_submitJob(self, jobDesc): """ Submit a single job to DIRAC WMS """ if self.peerUsesLimitedProxy: return S_ERROR("Can't submit using a limited proxy! (bad boy!)") # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result['OK']: return S_ERROR('Failed to get job policies') policyDict = result['Value'] if not policyDict[RIGHT_SUBMIT]: return S_ERROR('Job submission not authorized') #jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parametric one jobClassAd = ClassAd(jobDesc) nParameters = getNumberOfParameters(jobClassAd) parametricJob = False if nParameters > 0: parametricJob = True result = generateParametricJobs(jobClassAd) if not result['OK']: return result jobDescList = result['Value'] else: jobDescList = [jobDesc] jobIDList = [] for jobDescription in jobDescList: result = gJobDB.insertNewJobIntoDB(jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup) if not result['OK']: return result jobID = result['JobID'] gLogger.info('Job %s added to the JobDB for %s/%s' % (jobID, self.ownerDN, self.ownerGroup)) gJobLoggingDB.addLoggingRecord(jobID, result['Status'], result['MinorStatus'], source='JobManager') jobIDList.append(jobID) #Set persistency flag retVal = gProxyManager.getUserPersistence(self.ownerDN, self.ownerGroup) if 'Value' not in retVal or not retVal['Value']: gProxyManager.setPersistency(self.ownerDN, self.ownerGroup, True) if parametricJob: result = S_OK(jobIDList) else: result = S_OK(jobIDList[0]) result['JobID'] = result['Value'] result['requireProxyUpload'] = self.__checkIfProxyUploadIsRequired() self.__sendJobsToOptimizationMind(jobIDList) return result ########################################################################### def __checkIfProxyUploadIsRequired(self): result = gProxyManager.userHasProxy(self.ownerDN, self.ownerGroup, validSeconds=18000) if not result['OK']: gLogger.error("Can't check if the user has proxy uploaded:", result['Message']) return True #Check if an upload is required return result['Value'] == False ########################################################################### types_invalidateJob = [IntType] def invalidateJob(self, jobID): """ Make job with jobID invalid, e.g. because of the sandbox submission errors. """ pass ########################################################################### def __get_job_list(self, jobInput): """ Evaluate the jobInput into a list of ints """ if isinstance(jobInput, int): return [jobInput] if isinstance(jobInput, basestring): try: ijob = int(jobInput) return [ijob] except: return [] if isinstance(jobInput, list): try: ljob = [int(x) for x in jobInput] return ljob except: return [] return [] ########################################################################### types_rescheduleJob = [] def export_rescheduleJob(self, jobIDs): """ Reschedule a single job. If the optional proxy parameter is given it will be used to refresh the proxy in the Proxy Repository """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESCHEDULE) for jobID in validJobList: gtaskQueueDB.deleteJob(jobID) #gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) gLogger.debug(str(result)) if not result['OK']: return result gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') if invalidJobList or nonauthJobList: result = S_ERROR('Some jobs failed reschedule') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList return result result = S_OK(validJobList) result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() self.__sendJobsToOptimizationMind(validJobList) return result def __deleteJob(self, jobID): """ Delete one job """ result = gJobDB.setJobStatus(jobID, 'Deleted', 'Checking accounting') if not result['OK']: return result result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue') return S_OK() def __killJob(self, jobID, sendKillCommand=True): """ Kill one job """ if sendKillCommand: result = gJobDB.setJobCommand(jobID, 'Kill') if not result['OK']: return result gLogger.info('Job %d is marked for termination' % jobID) result = gJobDB.setJobStatus(jobID, 'Killed', 'Marked for termination') if not result['OK']: gLogger.warn('Failed to set job Killed status') result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue') return S_OK() def __kill_delete_jobs(self, jobIDList, right): """ Kill or delete jobs as necessary """ jobList = self.__get_job_list(jobIDList) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDList)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, right) # Get job status to see what is to be killed or deleted result = gJobDB.getAttributesForJobList(validJobList, ['Status']) if not result['OK']: return result killJobList = [] deleteJobList = [] markKilledJobList = [] stagingJobList = [] for jobID, sDict in result['Value'].items(): if sDict['Status'] in ['Running', 'Matched', 'Stalled']: killJobList.append(jobID) elif sDict['Status'] in ['Done', 'Failed']: if not right == RIGHT_KILL: deleteJobList.append(jobID) else: markKilledJobList.append(jobID) if sDict['Status'] in ['Staging']: stagingJobList.append(jobID) bad_ids = [] for jobID in markKilledJobList: result = self.__killJob(jobID, sendKillCommand=False) if not result['OK']: bad_ids.append(jobID) for jobID in killJobList: result = self.__killJob(jobID) if not result['OK']: bad_ids.append(jobID) for jobID in deleteJobList: result = self.__deleteJob(jobID) if not result['OK']: bad_ids.append(jobID) if stagingJobList: stagerClient = StorageManagerClient() gLogger.info('Going to send killing signal to stager as well!') result = stagerClient.killTasksBySourceTaskID(stagingJobList) if not result['OK']: gLogger.warn('Failed to kill some Stager tasks: %s' % result['Message']) if nonauthJobList or bad_ids: result = S_ERROR('Some jobs failed deletion') if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK(validJobList) result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() if invalidJobList: result['InvalidJobIDs'] = invalidJobList return result ########################################################################### types_deleteJob = [] def export_deleteJob(self, jobIDs): """ Delete jobs specified in the jobIDs list """ return self.__kill_delete_jobs(jobIDs, RIGHT_DELETE) ########################################################################### types_killJob = [] def export_killJob(self, jobIDs): """ Kill jobs specified in the jobIDs list """ return self.__kill_delete_jobs(jobIDs, RIGHT_KILL) ########################################################################### types_resetJob = [] def export_resetJob(self, jobIDs): """ Reset jobs specified in the jobIDs list """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESET) bad_ids = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobAttribute(jobID, 'RescheduleCounter', -1) if not result['OK']: bad_ids.append(jobID) else: gtaskQueueDB.deleteJob(jobID) #gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) if not result['OK']: bad_ids.append(jobID) else: good_ids.append(jobID) gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') self.__sendJobsToOptimizationMind(good_ids) if invalidJobList or nonauthJobList or bad_ids: result = S_ERROR('Some jobs failed resetting') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK() result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result
class JobManagerHandler(RequestHandler): @classmethod def initializeHandler(cls, serviceInfoDict): cls.msgClient = MessageClient("WorkloadManagement/OptimizationMind") cls.__connectToOptMind() gThreadScheduler.addPeriodicTask(60, cls.__connectToOptMind) return S_OK() @classmethod def __connectToOptMind(cls): if not cls.msgClient.connected: result = cls.msgClient.connect(JobManager=True) if not result['OK']: cls.log.warn("Cannot connect to OptimizationMind!", result['Message']) def initialize(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] self.userProperties = credDict['properties'] self.owner = credDict['username'] self.peerUsesLimitedProxy = credDict['isLimitedProxy'] self.diracSetup = self.serviceInfoDict['clientSetup'] self.maxParametricJobs = self.srv_getCSOption('MaxParametricJobs', MAX_PARAMETRIC_JOBS) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.userProperties) self.jobPolicy.setJobDB(gJobDB) return S_OK() def __sendNewJobsToMind(self, jids): if not self.msgClient.connected: return result = self.msgClient.createMessage("OptimizeJobs") if not result['OK']: self.log.error("Cannot create Optimize message: %s" % result['Message']) return msgObj = result['Value'] msgObj.jids = jids result = self.msgClient.sendMessage(msgObj) if not result['OK']: self.log.error("Cannot send Optimize message: %s" % result['Message']) return self.log.info("Optimize msg sent for %s jobs" % len(jids)) ########################################################################### types_submitJob = [StringType] def export_submitJob(self, jobDesc): """ Submit a single job to DIRAC WMS """ if self.peerUsesLimitedProxy: return S_ERROR("Can't submit using a limited proxy! (bad boy!)") # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result['OK']: return S_ERROR('Failed to get job policies') policyDict = result['Value'] if not policyDict[RIGHT_SUBMIT]: return S_ERROR('Job submission not authorized') #jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parameteric one jobClassAd = ClassAd(jobDesc) parametricJob = False if jobClassAd.lookupAttribute('Parameters'): parametricJob = True if jobClassAd.isAttributeList('Parameters'): parameterList = jobClassAd.getListFromExpression('Parameters') else: pStep = 0 pFactor = 1 pStart = 1 nParameters = jobClassAd.getAttributeInt('Parameters') if not nParameters: value = jobClassAd.get_expression('Parameters') return S_ERROR( 'Illegal value for Parameters JDL field: %s' % value) if jobClassAd.lookupAttribute('ParameterStart'): value = jobClassAd.get_expression( 'ParameterStart').replace('"', '') try: pStart = int(value) except: try: pStart = float(value) except: return S_ERROR( 'Illegal value for ParameterStart JDL field: %s' % value) if jobClassAd.lookupAttribute('ParameterStep'): pStep = jobClassAd.getAttributeInt('ParameterStep') if not pStep: pStep = jobClassAd.getAttributeFloat('ParameterStep') if not pStep: value = jobClassAd.get_expression('ParameterStep') return S_ERROR( 'Illegal value for ParameterStep JDL field: %s' % value) if jobClassAd.lookupAttribute('ParameterFactor'): pFactor = jobClassAd.getAttributeInt('ParameterFactor') if not pFactor: pFactor = jobClassAd.getAttributeFloat( 'ParameterFactor') if not pFactor: value = jobClassAd.get_expression( 'ParameterFactor') return S_ERROR( 'Illegal value for ParameterFactor JDL field: %s' % value) parameterList = list() parameterList.append(pStart) for i in range(nParameters - 1): parameterList.append(parameterList[i] * pFactor + pStep) if len(parameterList) > self.maxParametricJobs: return S_ERROR( 'The number of parametric jobs exceeded the limit of %d' % self.maxParametricJobs) jobDescList = [] nParam = len(parameterList) - 1 for n, p in enumerate(parameterList): newJobDesc = jobDesc.replace('%s', str(p)).replace( '%n', str(n).zfill(len(str(nParam)))) newClassAd = ClassAd(newJobDesc) for attr in ['Parameters', 'ParameterStep', 'ParameterFactor']: newClassAd.deleteAttribute(attr) if type(p) == type(' ') and p.startswith('{'): newClassAd.insertAttributeInt('Parameter', str(p)) else: newClassAd.insertAttributeString('Parameter', str(p)) newClassAd.insertAttributeInt('ParameterNumber', n) newJDL = newClassAd.asJDL() jobDescList.append(newJDL) else: jobDescList = [jobDesc] jobIDList = [] for jobDescription in jobDescList: result = gJobDB.insertNewJobIntoDB(jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup) if not result['OK']: return result jobID = result['JobID'] gLogger.info('Job %s added to the JobDB for %s/%s' % (jobID, self.ownerDN, self.ownerGroup)) gJobLoggingDB.addLoggingRecord(jobID, result['Status'], result['MinorStatus'], source='JobManager') jobIDList.append(jobID) #Set persistency flag retVal = gProxyManager.getUserPersistence(self.ownerDN, self.ownerGroup) if 'Value' not in retVal or not retVal['Value']: gProxyManager.setPersistency(self.ownerDN, self.ownerGroup, True) if parametricJob: result = S_OK(jobIDList) else: result = S_OK(jobIDList[0]) result['JobID'] = result['Value'] result['requireProxyUpload'] = self.__checkIfProxyUploadIsRequired() self.__sendNewJobsToMind(jobIDList) return result ########################################################################### def __checkIfProxyUploadIsRequired(self): result = gProxyManager.userHasProxy(self.ownerDN, self.ownerGroup, validSeconds=18000) if not result['OK']: gLogger.error("Can't check if the user has proxy uploaded:", result['Message']) return True #Check if an upload is required return result['Value'] == False ########################################################################### types_invalidateJob = [IntType] def invalidateJob(self, jobID): """ Make job with jobID invalid, e.g. because of the sandbox submission errors. """ pass ########################################################################### def __get_job_list(self, jobInput): """ Evaluate the jobInput into a list of ints """ if type(jobInput) == IntType: return [jobInput] if type(jobInput) == StringType: try: ijob = int(jobInput) return [ijob] except: return [] if type(jobInput) == ListType: try: ljob = [int(x) for x in jobInput] return ljob except: return [] return [] ########################################################################### types_rescheduleJob = [] def export_rescheduleJob(self, jobIDs): """ Reschedule a single job. If the optional proxy parameter is given it will be used to refresh the proxy in the Proxy Repository """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESCHEDULE) for jobID in validJobList: gtaskQueueDB.deleteJob(jobID) #gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) gLogger.debug(str(result)) if not result['OK']: return result gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') if invalidJobList or nonauthJobList: result = S_ERROR('Some jobs failed deletion') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList return result result = S_OK(validJobList) result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() self.__sendNewJobsToMind(validJobList) return result def __deleteJob(self, jobID): """ Delete one job """ result = gJobDB.setJobStatus(jobID, 'Deleted', 'Checking accounting') if not result['OK']: return result result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue') return S_OK() def __killJob(self, jobID): """ Kill one job """ result = gJobDB.setJobCommand(jobID, 'Kill') if not result['OK']: return result else: gLogger.info('Job %d is marked for termination' % jobID) result = gJobDB.setJobStatus(jobID, 'Killed', 'Marked for termination') if not result['OK']: gLogger.warn('Failed to set job Killed status') result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue') return S_OK() def __kill_delete_jobs(self, jobIDList, right): """ Kill or delete jobs as necessary """ jobList = self.__get_job_list(jobIDList) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDList)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, right) # Get job status to see what is to be killed or deleted result = gJobDB.getAttributesForJobList(validJobList, ['Status']) if not result['OK']: return result killJobList = [] deleteJobList = [] for jobID, sDict in result['Value'].items(): if sDict['Status'] in ['Running', 'Matched', 'Stalled']: killJobList.append(jobID) elif sDict['Status'] in ['Done', 'Failed']: if not right == RIGHT_KILL: deleteJobList.append(jobID) else: deleteJobList.append(jobID) bad_ids = [] for jobID in killJobList: result = self.__killJob(jobID) if not result['OK']: bad_ids.append(jobID) for jobID in deleteJobList: result = self.__deleteJob(jobID) if not result['OK']: bad_ids.append(jobID) if invalidJobList or nonauthJobList or bad_ids: result = S_ERROR('Some jobs failed deletion') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK(validJobList) result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result ########################################################################### types_deleteJob = [] def export_deleteJob(self, jobIDs): """ Delete jobs specified in the jobIDs list """ return self.__kill_delete_jobs(jobIDs, RIGHT_DELETE) ########################################################################### types_killJob = [] def export_killJob(self, jobIDs): """ Kill jobs specified in the jobIDs list """ return self.__kill_delete_jobs(jobIDs, RIGHT_KILL) ########################################################################### types_resetJob = [] def export_resetJob(self, jobIDs): """ Reset jobs specified in the jobIDs list """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESET) bad_ids = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobAttribute(jobID, 'RescheduleCounter', -1) if not result['OK']: bad_ids.append(jobID) else: gtaskQueueDB.deleteJob(jobID) #gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) if not result['OK']: bad_ids.append(jobID) else: good_ids.append(jobID) gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') self.__sendNewJobsToMind(good_ids) if invalidJobList or nonauthJobList or bad_ids: result = S_ERROR('Some jobs failed resetting') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK() result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result
class JobManagerHandler( RequestHandler ): @classmethod def initializeHandler( cls, serviceInfoDict ): cls.msgClient = MessageClient( "WorkloadManagement/OptimizationMind" ) result = cls.msgClient.connect( JobManager = True ) if not result[ 'OK' ]: cls.log.error( "Cannot connect to OptimizationMind!", result[ 'Message' ] ) return result def initialize( self ): credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] self.userProperties = credDict[ 'properties' ] self.owner = credDict[ 'username' ] self.peerUsesLimitedProxy = credDict[ 'isLimitedProxy' ] self.diracSetup = self.serviceInfoDict['clientSetup'] self.maxParametricJobs = self.srv_getCSOption( 'MaxParametricJobs', MAX_PARAMETRIC_JOBS ) self.jobPolicy = JobPolicy( self.ownerDN, self.ownerGroup, self.userProperties ) return S_OK() def __sendNewJobsToMind( self, jids ): result = self.msgClient.createMessage( "OptimizeJobs" ) if not result[ 'OK' ]: self.log.error( "Cannot create Optimize message: %s" % result[ 'Message' ] ) return msgObj = result[ 'Value' ] msgObj.jids = jids result = self.msgClient.sendMessage( msgObj ) if not result[ 'OK' ]: self.log.error( "Cannot send Optimize message: %s" % result[ 'Message' ] ) return ########################################################################### types_submitJob = [ StringType ] def export_submitJob( self, jobDesc ): """ Submit a single job to DIRAC WMS """ if self.peerUsesLimitedProxy: return S_ERROR( "Can't submit using a limited proxy! (bad boy!)" ) # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result['OK']: return S_ERROR( 'Failed to get job policies' ) policyDict = result['Value'] if not policyDict[ RIGHT_SUBMIT ]: return S_ERROR( 'Job submission not authorized' ) #jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parameteric one jobClassAd = ClassAd( jobDesc ) parametricJob = False if jobClassAd.lookupAttribute( 'Parameters' ): parametricJob = True if jobClassAd.isAttributeList( 'Parameters' ): parameterList = jobClassAd.getListFromExpression( 'Parameters' ) else: pStep = 0 pFactor = 1 pStart = 1 nParameters = jobClassAd.getAttributeInt( 'Parameters' ) if not nParameters: value = jobClassAd.get_expression( 'Parameters' ) return S_ERROR( 'Illegal value for Parameters JDL field: %s' % value ) if jobClassAd.lookupAttribute( 'ParameterStart' ): value = jobClassAd.get_expression( 'ParameterStart' ).replace( '"', '' ) try: pStart = int( value ) except: try: pStart = float( value ) except: return S_ERROR( 'Illegal value for ParameterStart JDL field: %s' % value ) if jobClassAd.lookupAttribute( 'ParameterStep' ): pStep = jobClassAd.getAttributeInt( 'ParameterStep' ) if not pStep: pStep = jobClassAd.getAttributeFloat( 'ParameterStep' ) if not pStep: value = jobClassAd.get_expression( 'ParameterStep' ) return S_ERROR( 'Illegal value for ParameterStep JDL field: %s' % value ) if jobClassAd.lookupAttribute( 'ParameterFactor' ): pFactor = jobClassAd.getAttributeInt( 'ParameterFactor' ) if not pFactor: pFactor = jobClassAd.getAttributeFloat( 'ParameterFactor' ) if not pFactor: value = jobClassAd.get_expression( 'ParameterFactor' ) return S_ERROR( 'Illegal value for ParameterFactor JDL field: %s' % value ) parameterList = list() parameterList.append( pStart ) for i in range( nParameters - 1 ): parameterList.append( parameterList[i] * pFactor + pStep ) if len( parameterList ) > self.maxParametricJobs: return S_ERROR( 'The number of parametric jobs exceeded the limit of %d' % self.maxParametricJobs ) jobDescList = [] nParam = len(parameterList) - 1 for n,p in enumerate(parameterList): newJobDesc = jobDesc.replace('%s',str(p)).replace('%n',str(n).zfill(len(str(nParam)))) newClassAd = ClassAd(newJobDesc) for attr in ['Parameters','ParameterStep','ParameterFactor']: newClassAd.deleteAttribute(attr) if type( p ) == type ( ' ' ) and p.startswith('{'): newClassAd.insertAttributeInt( 'Parameter',str(p) ) else: newClassAd.insertAttributeString( 'Parameter', str( p ) ) newClassAd.insertAttributeInt( 'ParameterNumber', n ) newJDL = newClassAd.asJDL() jobDescList.append( newJDL ) else: jobDescList = [ jobDesc ] jobIDList = [] for jobDescription in jobDescList: result = gJobDB.insertNewJobIntoDB( jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup ) if not result['OK']: return result jobID = result['JobID'] gLogger.info( 'Job %s added to the JobDB for %s/%s' % ( jobID, self.ownerDN, self.ownerGroup ) ) gJobLoggingDB.addLoggingRecord( jobID, result['Status'], result['MinorStatus'], source = 'JobManager' ) jobIDList.append( jobID ) #Set persistency flag retVal = gProxyManager.getUserPersistence( self.ownerDN, self.ownerGroup ) if 'Value' not in retVal or not retVal[ 'Value' ]: gProxyManager.setPersistency( self.ownerDN, self.ownerGroup, True ) if parametricJob: result = S_OK( jobIDList ) else: result = S_OK( jobIDList[0] ) result['JobID'] = result['Value'] result[ 'requireProxyUpload' ] = self.__checkIfProxyUploadIsRequired() self.__sendNewJobsToMind( jobIDList ) return result ########################################################################### def __checkIfProxyUploadIsRequired( self ): result = gProxyManager.userHasProxy( self.ownerDN, self.ownerGroup, validSeconds = 18000 ) if not result[ 'OK' ]: gLogger.error( "Can't check if the user has proxy uploaded:", result[ 'Message' ] ) return True #Check if an upload is required return result[ 'Value' ] == False ########################################################################### types_invalidateJob = [ IntType ] def invalidateJob( self, jobID ): """ Make job with jobID invalid, e.g. because of the sandbox submission errors. """ pass ########################################################################### def __get_job_list( self, jobInput ): """ Evaluate the jobInput into a list of ints """ if type( jobInput ) == IntType: return [jobInput] if type( jobInput ) == StringType: try: ijob = int( jobInput ) return [ijob] except: return [] if type( jobInput ) == ListType: try: ljob = [ int( x ) for x in jobInput ] return ljob except: return [] return [] ########################################################################### def __evaluate_rights( self, jobList, right ): """ Get access rights to jobID for the user ownerDN/ownerGroup """ self.jobPolicy.setJobDB( gJobDB ) validJobList = [] invalidJobList = [] nonauthJobList = [] ownerJobList = [] for jobID in jobList: result = self.jobPolicy.getUserRightsForJob( jobID ) if result['OK']: if result['Value'][right]: validJobList.append( jobID ) else: nonauthJobList.append( jobID ) if result[ 'UserIsOwner' ]: ownerJobList.append( jobID ) else: invalidJobList.append( jobID ) return validJobList, invalidJobList, nonauthJobList, ownerJobList ########################################################################### types_rescheduleJob = [ ] def export_rescheduleJob( self, jobIDs ): """ Reschedule a single job. If the optional proxy parameter is given it will be used to refresh the proxy in the Proxy Repository """ jobList = self.__get_job_list( jobIDs ) if not jobList: return S_ERROR( 'Invalid job specification: ' + str( jobIDs ) ) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.__evaluate_rights( jobList, RIGHT_RESCHEDULE ) for jobID in validJobList: gtaskQueueDB.deleteJob( jobID ) #gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob( jobID ) gLogger.debug( str( result ) ) if not result['OK']: return result gJobLoggingDB.addLoggingRecord( result['JobID'], result['Status'], result['MinorStatus'], application = 'Unknown', source = 'JobManager' ) if invalidJobList or nonauthJobList: result = S_ERROR( 'Some jobs failed deletion' ) if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList return result result = S_OK( validJobList ) result[ 'requireProxyUpload' ] = len( ownerJobList ) > 0 and self.__checkIfProxyUploadIsRequired() self.__sendNewJobsToMind( validJobList ) return result ########################################################################### types_deleteJob = [ ] def export_deleteJob( self, jobIDs ): """ Delete jobs specified in the jobIDs list """ jobList = self.__get_job_list( jobIDs ) if not jobList: return S_ERROR( 'Invalid job specification: ' + str( jobIDs ) ) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.__evaluate_rights( jobList, RIGHT_DELETE ) bad_ids = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobStatus( jobID, 'Deleted', 'Checking accounting' ) if not result['OK']: bad_ids.append( jobID ) else: good_ids.append( jobID ) #result = gJobDB.deleteJobFromQueue(jobID) #if not result['OK']: # gLogger.warn('Failed to delete job from the TaskQueue (old)') result = gtaskQueueDB.deleteJob( jobID ) if not result['OK']: gLogger.warn( 'Failed to delete job from the TaskQueue' ) if invalidJobList or nonauthJobList: result = S_ERROR( 'Some jobs failed deletion' ) if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK( validJobList ) result[ 'requireProxyUpload' ] = len( ownerJobList ) > 0 and self.__checkIfProxyUploadIsRequired() return result ########################################################################### types_killJob = [ ] def export_killJob( self, jobIDs ): """ Kill jobs specified in the jobIDs list """ jobList = self.__get_job_list( jobIDs ) if not jobList: return S_ERROR( 'Invalid job specification: ' + str( jobIDs ) ) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.__evaluate_rights( jobList, RIGHT_KILL ) bad_ids = [] good_ids = [] for jobID in validJobList: # kill jobID result = gJobDB.setJobCommand( jobID, 'Kill' ) if not result['OK']: bad_ids.append( jobID ) else: gLogger.info( 'Job %d is marked for termination' % jobID ) good_ids.append( jobID ) result = gJobDB.setJobStatus( jobID, 'Killed', 'Marked for termination' ) if not result['OK']: gLogger.warn( 'Failed to set job status' ) #result = gJobDB.deleteJobFromQueue(jobID) #if not result['OK']: # gLogger.warn('Failed to delete job from the TaskQueue (old)') result = gtaskQueueDB.deleteJob( jobID ) if not result['OK']: gLogger.warn( 'Failed to delete job from the TaskQueue' ) if invalidJobList or nonauthJobList or bad_ids: result = S_ERROR( 'Some jobs failed deletion' ) if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK( validJobList ) result[ 'requireProxyUpload' ] = len( ownerJobList ) > 0 and self.__checkIfProxyUploadIsRequired() return result ########################################################################### types_resetJob = [ ] def export_resetJob( self, jobIDs ): """ Reset jobs specified in the jobIDs list """ jobList = self.__get_job_list( jobIDs ) if not jobList: return S_ERROR( 'Invalid job specification: ' + str( jobIDs ) ) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.__evaluate_rights( jobList, RIGHT_RESET ) bad_ids = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobAttribute( jobID, 'RescheduleCounter', -1 ) if not result['OK']: bad_ids.append( jobID ) else: gtaskQueueDB.deleteJob( jobID ) #gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob( jobID ) if not result['OK']: bad_ids.append( jobID ) else: good_ids.append( jobID ) gJobLoggingDB.addLoggingRecord( result['JobID'], result['Status'], result['MinorStatus'], application = 'Unknown', source = 'JobManager' ) self.__sendNewJobsToMind( good_ids ) if invalidJobList or nonauthJobList or bad_ids: result = S_ERROR( 'Some jobs failed resetting' ) if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK() result[ 'requireProxyUpload' ] = len( ownerJobList ) > 0 and self.__checkIfProxyUploadIsRequired() return result
class JobManagerHandler(RequestHandler): """ RequestHandler implementation of the JobManager """ @classmethod def initializeHandler(cls, serviceInfoDict): cls.msgClient = MessageClient("WorkloadManagement/OptimizationMind") cls.__connectToOptMind() gThreadScheduler.addPeriodicTask(60, cls.__connectToOptMind) return S_OK() @classmethod def __connectToOptMind(cls): if not cls.msgClient.connected: result = cls.msgClient.connect(JobManager=True) if not result['OK']: cls.log.warn("Cannot connect to OptimizationMind!", result['Message']) def initialize(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] self.userProperties = credDict['properties'] self.owner = credDict['username'] self.peerUsesLimitedProxy = credDict['isLimitedProxy'] self.diracSetup = self.serviceInfoDict['clientSetup'] self.maxParametricJobs = self.srv_getCSOption('MaxParametricJobs', MAX_PARAMETRIC_JOBS) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.userProperties) self.jobPolicy.setJobDB(gJobDB) return S_OK() def __sendJobsToOptimizationMind(self, jids): if not self.msgClient.connected: return result = self.msgClient.createMessage("OptimizeJobs") if not result['OK']: self.log.error("Cannot create Optimize message: %s" % result['Message']) return msgObj = result['Value'] msgObj.jids = list(sorted(jids)) result = self.msgClient.sendMessage(msgObj) if not result['OK']: self.log.error("Cannot send Optimize message: %s" % result['Message']) return self.log.info("Optimize msg sent for %s jobs" % len(jids)) ########################################################################### types_submitJob = [basestring] def export_submitJob(self, jobDesc): """ Submit a job to DIRAC WMS. The job can be a single job, or a parametric job. If it is a parametric job, then the parameters will need to be unpacked. :param str jobDesc: job description JDL (of a single or parametric job) :return: S_OK/S_ERROR, a list of newly created job IDs in case of S_OK. """ if self.peerUsesLimitedProxy: return S_ERROR(EWMSSUBM, "Can't submit using a limited proxy") # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result['OK']: return S_ERROR(EWMSSUBM, 'Failed to get job policies') policyDict = result['Value'] if not policyDict[RIGHT_SUBMIT]: return S_ERROR(EWMSSUBM, 'Job submission not authorized') # jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parametric one jobClassAd = ClassAd(jobDesc) result = getParameterVectorLength(jobClassAd) if not result['OK']: return result nJobs = result['Value'] parametricJob = False if nJobs > 0: # if we are here, then jobDesc was the description of a parametric job. So we start unpacking parametricJob = True if nJobs > self.maxParametricJobs: return S_ERROR(EWMSJDL, "Number of parametric jobs exceeds the limit of %d" % self.maxParametricJobs) result = generateParametricJobs(jobClassAd) if not result['OK']: return result jobDescList = result['Value'] else: # if we are here, then jobDesc was the description of a single job. jobDescList = [jobDesc] jobIDList = [] if parametricJob: initialStatus = 'Submitting' initialMinorStatus = 'Bulk transaction confirmation' else: initialStatus = 'Received' initialMinorStatus = 'Job accepted' for jobDescription in jobDescList: # jobDescList because there might be a list generated by a parametric job result = gJobDB.insertNewJobIntoDB(jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup, initialStatus=initialStatus, initialMinorStatus=initialMinorStatus) if not result['OK']: return result jobID = result['JobID'] gLogger.info('Job %s added to the JobDB for %s/%s' % (jobID, self.ownerDN, self.ownerGroup)) gJobLoggingDB.addLoggingRecord(jobID, result['Status'], result['MinorStatus'], source='JobManager') jobIDList.append(jobID) # Set persistency flag retVal = gProxyManager.getUserPersistence(self.ownerDN, self.ownerGroup) if 'Value' not in retVal or not retVal['Value']: gProxyManager.setPersistency(self.ownerDN, self.ownerGroup, True) if parametricJob: result = S_OK(jobIDList) else: result = S_OK(jobIDList[0]) result['JobID'] = result['Value'] result['requireProxyUpload'] = self.__checkIfProxyUploadIsRequired() return result ########################################################################### types_confirmBulkSubmission = [list] def export_confirmBulkSubmission(self, jobIDs): """ Confirm the possibility to proceed with processing of the jobs specified by the jobIDList :param jobIDList: list of job IDs :return: confirmed job IDs """ jobList = self.__getJobList(jobIDs) if not jobList: return S_ERROR(EWMSSUBM, 'Invalid job specification: ' + str(jobIDs)) validJobList, _invalidJobList, _nonauthJobList, _ownerJobList = self.jobPolicy.evaluateJobRights(jobList, RIGHT_SUBMIT) # Check that all the requested jobs are eligible if set(jobList) != set(validJobList): return S_ERROR(EWMSSUBM, 'Requested jobs for bulk transaction are not valid') result = gJobDB.getAttributesForJobList(jobList, ['Status', 'MinorStatus']) if not result['OK']: return S_ERROR(EWMSSUBM, 'Requested jobs for bulk transaction are not valid') jobStatusDict = result['Value'] # Check if the jobs are already activated jobEnabledList = [jobID for jobID in jobList if jobStatusDict[jobID]['Status'] in ["Received", "Checking", "Waiting", "Matched", "Running"]] if set(jobEnabledList) == set(jobList): return S_OK(jobList) # Check that requested job are in Submitting status jobUpdateStatusList = list(jobID for jobID in jobList if jobStatusDict[jobID]['Status'] == "Submitting") if set(jobUpdateStatusList) != set(jobList): return S_ERROR(EWMSSUBM, 'Requested jobs for bulk transaction are not valid') # Update status of all the requested jobs in one transaction result = gJobDB.setJobAttributes(jobUpdateStatusList, ['Status', 'MinorStatus'], ['Received', 'Job accepted']) if not result['OK']: return result self.__sendJobsToOptimizationMind(jobUpdateStatusList) return S_OK(jobUpdateStatusList) ########################################################################### def __checkIfProxyUploadIsRequired(self): result = gProxyManager.userHasProxy(self.ownerDN, self.ownerGroup, validSeconds=18000) if not result['OK']: gLogger.error("Can't check if the user has proxy uploaded:", result['Message']) return True # Check if an upload is required return not result['Value'] ########################################################################### def __getJobList(self, jobInput): """ Evaluate the jobInput into a list of ints :param jobInput: one or more job IDs in int or str form :type jobInput: str or int or list :return : a list of int job IDs """ if isinstance(jobInput, int): return [jobInput] if isinstance(jobInput, basestring): try: ijob = int(jobInput) return [ijob] except BaseException: return [] if isinstance(jobInput, list): try: ljob = [int(x) for x in jobInput] return ljob except BaseException: return [] return [] ########################################################################### types_rescheduleJob = [] def export_rescheduleJob(self, jobIDs): """ Reschedule a single job. If the optional proxy parameter is given it will be used to refresh the proxy in the Proxy Repository :param jobIDList: list of job IDs :return: confirmed job IDs """ jobList = self.__getJobList(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(jobList, RIGHT_RESCHEDULE) for jobID in validJobList: gtaskQueueDB.deleteJob(jobID) # gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) gLogger.debug(str(result)) if not result['OK']: return result gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') if invalidJobList or nonauthJobList: result = S_ERROR('Some jobs failed reschedule') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList return result result = S_OK(validJobList) result['requireProxyUpload'] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() self.__sendJobsToOptimizationMind(validJobList) return result def __deleteJob(self, jobID): """ Delete one job """ result = gJobDB.setJobStatus(jobID, 'Deleted', 'Checking accounting') if not result['OK']: return result result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue') # if it was the last job for the pilot, clear PilotsLogging about it result = gPilotAgentsDB.getPilotsForJobID(jobID) if not result['OK']: gLogger.error("Failed to get Pilots for JobID", result['Message']) return result for pilot in result['Value']: res = gPilotAgentsDB.getJobsForPilot(pilot['PilotID']) if not res['OK']: gLogger.error("Failed to get jobs for pilot", res['Message']) return res if not res['Value']: # if list of jobs for pilot is empty, delete pilot and pilotslogging result = gPilotAgentsDB.getPilotInfo(pilotID=pilot['PilotID']) if not result['OK']: gLogger.error("Failed to get pilot info", result['Message']) return result pilotRef = result[0]['PilotJobReference'] ret = gPilotAgentsDB.deletePilot(pilot['PilotID']) if not ret['OK']: gLogger.error("Failed to delete pilot from PilotAgentsDB", ret['Message']) return ret if enablePilotsLogging: ret = gPilotsLoggingDB.deletePilotsLogging(pilotRef) if not ret['OK']: gLogger.error("Failed to delete pilot logging from PilotAgentsDB", ret['Message']) return ret return S_OK() def __killJob(self, jobID, sendKillCommand=True): """ Kill one job """ if sendKillCommand: result = gJobDB.setJobCommand(jobID, 'Kill') if not result['OK']: return result gLogger.info('Job %d is marked for termination' % jobID) result = gJobDB.setJobStatus(jobID, 'Killed', 'Marked for termination') if not result['OK']: gLogger.warn('Failed to set job Killed status', result['Message']) result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue', result['Message']) return S_OK() def __kill_delete_jobs(self, jobIDList, right): """ Kill or delete jobs as necessary """ jobList = self.__getJobList(jobIDList) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDList)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(jobList, right) # Get job status to see what is to be killed or deleted result = gJobDB.getAttributesForJobList(validJobList, ['Status']) if not result['OK']: return result killJobList = [] deleteJobList = [] markKilledJobList = [] stagingJobList = [] for jobID, sDict in result['Value'].items(): if sDict['Status'] in ['Running', 'Matched', 'Stalled']: killJobList.append(jobID) elif sDict['Status'] in ['Done', 'Failed', 'Killed']: if not right == RIGHT_KILL: deleteJobList.append(jobID) else: markKilledJobList.append(jobID) if sDict['Status'] in ['Staging']: stagingJobList.append(jobID) badIDs = [] for jobID in markKilledJobList: result = self.__killJob(jobID, sendKillCommand=False) if not result['OK']: badIDs.append(jobID) for jobID in killJobList: result = self.__killJob(jobID) if not result['OK']: badIDs.append(jobID) for jobID in deleteJobList: result = self.__deleteJob(jobID) if not result['OK']: badIDs.append(jobID) if stagingJobList: stagerClient = StorageManagerClient() gLogger.info('Going to send killing signal to stager as well!') result = stagerClient.killTasksBySourceTaskID(stagingJobList) if not result['OK']: gLogger.warn('Failed to kill some Stager tasks: %s' % result['Message']) if nonauthJobList or badIDs: result = S_ERROR('Some jobs failed deletion') if nonauthJobList: gLogger.warn("Non-authorized JobIDs won't be deleted", str(nonauthJobList)) result['NonauthorizedJobIDs'] = nonauthJobList if badIDs: gLogger.warn("JobIDs failed to be deleted", str(badIDs)) result['FailedJobIDs'] = badIDs return result result = S_OK(validJobList) result['requireProxyUpload'] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() if invalidJobList: result['InvalidJobIDs'] = invalidJobList return result ########################################################################### types_deleteJob = [] def export_deleteJob(self, jobIDs): """ Delete jobs specified in the jobIDs list :param jobIDList: list of job IDs :return: S_OK/S_ERROR """ return self.__kill_delete_jobs(jobIDs, RIGHT_DELETE) ########################################################################### types_killJob = [] def export_killJob(self, jobIDs): """ Kill jobs specified in the jobIDs list :param jobIDList: list of job IDs :return: S_OK/S_ERROR """ return self.__kill_delete_jobs(jobIDs, RIGHT_KILL) ########################################################################### types_resetJob = [] def export_resetJob(self, jobIDs): """ Reset jobs specified in the jobIDs list :param jobIDList: list of job IDs :return: S_OK/S_ERROR """ jobList = self.__getJobList(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(jobList, RIGHT_RESET) badIDs = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobAttribute(jobID, 'RescheduleCounter', -1) if not result['OK']: badIDs.append(jobID) else: gtaskQueueDB.deleteJob(jobID) # gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) if not result['OK']: badIDs.append(jobID) else: good_ids.append(jobID) gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') self.__sendJobsToOptimizationMind(good_ids) if invalidJobList or nonauthJobList or badIDs: result = S_ERROR('Some jobs failed resetting') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if badIDs: result['FailedJobIDs'] = badIDs return result result = S_OK() result['requireProxyUpload'] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result
class JobManagerHandler(RequestHandler): @classmethod def initializeHandler(cls, serviceInfoDict): cls.msgClient = MessageClient("WorkloadManagement/OptimizationMind") cls.__connectToOptMind() gThreadScheduler.addPeriodicTask(60, cls.__connectToOptMind) return S_OK() @classmethod def __connectToOptMind(cls): if not cls.msgClient.connected: result = cls.msgClient.connect(JobManager=True) if not result["OK"]: cls.log.warn("Cannot connect to OptimizationMind!", result["Message"]) def initialize(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict["DN"] self.ownerGroup = credDict["group"] self.userProperties = credDict["properties"] self.owner = credDict["username"] self.peerUsesLimitedProxy = credDict["isLimitedProxy"] self.diracSetup = self.serviceInfoDict["clientSetup"] self.maxParametricJobs = self.srv_getCSOption("MaxParametricJobs", MAX_PARAMETRIC_JOBS) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.userProperties) self.jobPolicy.setJobDB(gJobDB) return S_OK() def __sendJobsToOptimizationMind(self, jids): if not self.msgClient.connected: return result = self.msgClient.createMessage("OptimizeJobs") if not result["OK"]: self.log.error("Cannot create Optimize message: %s" % result["Message"]) return msgObj = result["Value"] msgObj.jids = list(sorted(jids)) result = self.msgClient.sendMessage(msgObj) if not result["OK"]: self.log.error("Cannot send Optimize message: %s" % result["Message"]) return self.log.info("Optimize msg sent for %s jobs" % len(jids)) ########################################################################### types_submitJob = [StringTypes] def export_submitJob(self, jobDesc): """ Submit a single job to DIRAC WMS """ if self.peerUsesLimitedProxy: return S_ERROR("Can't submit using a limited proxy! (bad boy!)") # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result["OK"]: return S_ERROR("Failed to get job policies") policyDict = result["Value"] if not policyDict[RIGHT_SUBMIT]: return S_ERROR("Job submission not authorized") # jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parametric one jobClassAd = ClassAd(jobDesc) nParameters = getNumberOfParameters(jobClassAd) parametricJob = False if nParameters > 0: parametricJob = True result = generateParametricJobs(jobClassAd) if not result["OK"]: return result jobDescList = result["Value"] else: jobDescList = [jobDesc] jobIDList = [] for jobDescription in jobDescList: result = gJobDB.insertNewJobIntoDB( jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup ) if not result["OK"]: return result jobID = result["JobID"] gLogger.info("Job %s added to the JobDB for %s/%s" % (jobID, self.ownerDN, self.ownerGroup)) gJobLoggingDB.addLoggingRecord(jobID, result["Status"], result["MinorStatus"], source="JobManager") jobIDList.append(jobID) # Set persistency flag retVal = gProxyManager.getUserPersistence(self.ownerDN, self.ownerGroup) if "Value" not in retVal or not retVal["Value"]: gProxyManager.setPersistency(self.ownerDN, self.ownerGroup, True) if parametricJob: result = S_OK(jobIDList) else: result = S_OK(jobIDList[0]) result["JobID"] = result["Value"] result["requireProxyUpload"] = self.__checkIfProxyUploadIsRequired() self.__sendJobsToOptimizationMind(jobIDList) return result ########################################################################### def __checkIfProxyUploadIsRequired(self): result = gProxyManager.userHasProxy(self.ownerDN, self.ownerGroup, validSeconds=18000) if not result["OK"]: gLogger.error("Can't check if the user has proxy uploaded:", result["Message"]) return True # Check if an upload is required return result["Value"] == False ########################################################################### types_invalidateJob = [IntType] def invalidateJob(self, jobID): """ Make job with jobID invalid, e.g. because of the sandbox submission errors. """ pass ########################################################################### def __get_job_list(self, jobInput): """ Evaluate the jobInput into a list of ints """ if isinstance(jobInput, int): return [jobInput] if isinstance(jobInput, basestring): try: ijob = int(jobInput) return [ijob] except: return [] if isinstance(jobInput, list): try: ljob = [int(x) for x in jobInput] return ljob except: return [] return [] ########################################################################### types_rescheduleJob = [] def export_rescheduleJob(self, jobIDs): """ Reschedule a single job. If the optional proxy parameter is given it will be used to refresh the proxy in the Proxy Repository """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR("Invalid job specification: " + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESCHEDULE ) for jobID in validJobList: gtaskQueueDB.deleteJob(jobID) # gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) gLogger.debug(str(result)) if not result["OK"]: return result gJobLoggingDB.addLoggingRecord( result["JobID"], result["Status"], result["MinorStatus"], application="Unknown", source="JobManager" ) if invalidJobList or nonauthJobList: result = S_ERROR("Some jobs failed reschedule") if invalidJobList: result["InvalidJobIDs"] = invalidJobList if nonauthJobList: result["NonauthorizedJobIDs"] = nonauthJobList return result result = S_OK(validJobList) result["requireProxyUpload"] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() self.__sendJobsToOptimizationMind(validJobList) return result def __deleteJob(self, jobID): """ Delete one job """ result = gJobDB.setJobStatus(jobID, "Deleted", "Checking accounting") if not result["OK"]: return result result = gtaskQueueDB.deleteJob(jobID) if not result["OK"]: gLogger.warn("Failed to delete job from the TaskQueue") return S_OK() def __killJob(self, jobID, sendKillCommand=True): """ Kill one job """ if sendKillCommand: result = gJobDB.setJobCommand(jobID, "Kill") if not result["OK"]: return result gLogger.info("Job %d is marked for termination" % jobID) result = gJobDB.setJobStatus(jobID, "Killed", "Marked for termination") if not result["OK"]: gLogger.warn("Failed to set job Killed status") result = gtaskQueueDB.deleteJob(jobID) if not result["OK"]: gLogger.warn("Failed to delete job from the TaskQueue") return S_OK() def __kill_delete_jobs(self, jobIDList, right): """ Kill or delete jobs as necessary """ jobList = self.__get_job_list(jobIDList) if not jobList: return S_ERROR("Invalid job specification: " + str(jobIDList)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(jobList, right) # Get job status to see what is to be killed or deleted result = gJobDB.getAttributesForJobList(validJobList, ["Status"]) if not result["OK"]: return result killJobList = [] deleteJobList = [] markKilledJobList = [] stagingJobList = [] for jobID, sDict in result["Value"].items(): if sDict["Status"] in ["Running", "Matched", "Stalled"]: killJobList.append(jobID) elif sDict["Status"] in ["Done", "Failed"]: if not right == RIGHT_KILL: deleteJobList.append(jobID) else: markKilledJobList.append(jobID) if sDict["Status"] in ["Staging"]: stagingJobList.append(jobID) bad_ids = [] for jobID in markKilledJobList: result = self.__killJob(jobID, sendKillCommand=False) if not result["OK"]: bad_ids.append(jobID) for jobID in killJobList: result = self.__killJob(jobID) if not result["OK"]: bad_ids.append(jobID) for jobID in deleteJobList: result = self.__deleteJob(jobID) if not result["OK"]: bad_ids.append(jobID) if stagingJobList: stagerClient = StorageManagerClient() gLogger.info("Going to send killing signal to stager as well!") result = stagerClient.killTasksBySourceTaskID(stagingJobList) if not result["OK"]: gLogger.warn("Failed to kill some Stager tasks: %s" % result["Message"]) if nonauthJobList or bad_ids: result = S_ERROR("Some jobs failed deletion") if nonauthJobList: result["NonauthorizedJobIDs"] = nonauthJobList if bad_ids: result["FailedJobIDs"] = bad_ids return result result = S_OK(validJobList) result["requireProxyUpload"] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() if invalidJobList: result["InvalidJobIDs"] = invalidJobList return result ########################################################################### types_deleteJob = [] def export_deleteJob(self, jobIDs): """ Delete jobs specified in the jobIDs list """ return self.__kill_delete_jobs(jobIDs, RIGHT_DELETE) ########################################################################### types_killJob = [] def export_killJob(self, jobIDs): """ Kill jobs specified in the jobIDs list """ return self.__kill_delete_jobs(jobIDs, RIGHT_KILL) ########################################################################### types_resetJob = [] def export_resetJob(self, jobIDs): """ Reset jobs specified in the jobIDs list """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR("Invalid job specification: " + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESET ) bad_ids = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobAttribute(jobID, "RescheduleCounter", -1) if not result["OK"]: bad_ids.append(jobID) else: gtaskQueueDB.deleteJob(jobID) # gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) if not result["OK"]: bad_ids.append(jobID) else: good_ids.append(jobID) gJobLoggingDB.addLoggingRecord( result["JobID"], result["Status"], result["MinorStatus"], application="Unknown", source="JobManager" ) self.__sendJobsToOptimizationMind(good_ids) if invalidJobList or nonauthJobList or bad_ids: result = S_ERROR("Some jobs failed resetting") if invalidJobList: result["InvalidJobIDs"] = invalidJobList if nonauthJobList: result["NonauthorizedJobIDs"] = nonauthJobList if bad_ids: result["FailedJobIDs"] = bad_ids return result result = S_OK() result["requireProxyUpload"] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result
class JobManagerHandler(RequestHandler): """ RequestHandler implementation of the JobManager """ @classmethod def initializeHandler(cls, serviceInfoDict): cls.msgClient = MessageClient("WorkloadManagement/OptimizationMind") cls.__connectToOptMind() gThreadScheduler.addPeriodicTask(60, cls.__connectToOptMind) return S_OK() @classmethod def __connectToOptMind(cls): if not cls.msgClient.connected: result = cls.msgClient.connect(JobManager=True) if not result['OK']: cls.log.warn("Cannot connect to OptimizationMind!", result['Message']) def initialize(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] self.userProperties = credDict['properties'] self.owner = credDict['username'] self.peerUsesLimitedProxy = credDict['isLimitedProxy'] self.diracSetup = self.serviceInfoDict['clientSetup'] self.maxParametricJobs = self.srv_getCSOption('MaxParametricJobs', MAX_PARAMETRIC_JOBS) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.userProperties) self.jobPolicy.jobDB = gJobDB return S_OK() def __sendJobsToOptimizationMind(self, jids): if not self.msgClient.connected: return result = self.msgClient.createMessage("OptimizeJobs") if not result['OK']: self.log.error("Cannot create Optimize message: %s" % result['Message']) return msgObj = result['Value'] msgObj.jids = list(sorted(jids)) result = self.msgClient.sendMessage(msgObj) if not result['OK']: self.log.error("Cannot send Optimize message: %s" % result['Message']) return self.log.info("Optimize msg sent for %s jobs" % len(jids)) ########################################################################### types_getMaxParametricJobs = [] def export_getMaxParametricJobs(self): """ Get the maximum number of parametric jobs """ return S_OK(self.maxParametricJobs) types_submitJob = [basestring] def export_submitJob(self, jobDesc): """ Submit a job to DIRAC WMS. The job can be a single job, or a parametric job. If it is a parametric job, then the parameters will need to be unpacked. :param str jobDesc: job description JDL (of a single or parametric job) :return: S_OK/S_ERROR, a list of newly created job IDs in case of S_OK. """ if self.peerUsesLimitedProxy: return S_ERROR(EWMSSUBM, "Can't submit using a limited proxy") # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result['OK']: return S_ERROR(EWMSSUBM, 'Failed to get job policies') policyDict = result['Value'] if not policyDict[RIGHT_SUBMIT]: return S_ERROR(EWMSSUBM, 'Job submission not authorized') # jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parametric one jobClassAd = ClassAd(jobDesc) result = getParameterVectorLength(jobClassAd) if not result['OK']: gLogger.error("Issue with getParameterVectorLength:", result['Message']) return result nJobs = result['Value'] parametricJob = False if nJobs > 0: # if we are here, then jobDesc was the description of a parametric job. So we start unpacking parametricJob = True if nJobs > self.maxParametricJobs: gLogger.error("Maximum of parametric jobs exceeded:", "limit %d smaller than number of jobs %d" % (self.maxParametricJobs, nJobs)) return S_ERROR(EWMSJDL, "Number of parametric jobs exceeds the limit of %d" % self.maxParametricJobs) result = generateParametricJobs(jobClassAd) if not result['OK']: return result jobDescList = result['Value'] else: # if we are here, then jobDesc was the description of a single job. jobDescList = [jobDesc] jobIDList = [] if parametricJob: initialStatus = 'Submitting' initialMinorStatus = 'Bulk transaction confirmation' else: initialStatus = 'Received' initialMinorStatus = 'Job accepted' for jobDescription in jobDescList: # jobDescList because there might be a list generated by a parametric job result = gJobDB.insertNewJobIntoDB(jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup, initialStatus=initialStatus, initialMinorStatus=initialMinorStatus) if not result['OK']: return result jobID = result['JobID'] gLogger.info('Job %s added to the JobDB for %s/%s' % (jobID, self.ownerDN, self.ownerGroup)) gJobLoggingDB.addLoggingRecord(jobID, result['Status'], result['MinorStatus'], source='JobManager') jobIDList.append(jobID) # Set persistency flag retVal = gProxyManager.getUserPersistence(self.ownerDN, self.ownerGroup) if 'Value' not in retVal or not retVal['Value']: gProxyManager.setPersistency(self.ownerDN, self.ownerGroup, True) if parametricJob: result = S_OK(jobIDList) else: result = S_OK(jobIDList[0]) result['JobID'] = result['Value'] result['requireProxyUpload'] = self.__checkIfProxyUploadIsRequired() # Ensure non-parametric jobs (i.e. non-bulk) get sent to optimizer immediately if not parametricJob: self.__sendJobsToOptimizationMind(jobIDList) return result ########################################################################### types_confirmBulkSubmission = [list] def export_confirmBulkSubmission(self, jobIDs): """ Confirm the possibility to proceed with processing of the jobs specified by the jobIDList :param jobIDList: list of job IDs :return: confirmed job IDs """ jobList = self.__getJobList(jobIDs) if not jobList: gLogger.error("Issue with __getJobList", ": invalid job specification %s" % str(jobIDs)) return S_ERROR(EWMSSUBM, 'Invalid job specification: ' + str(jobIDs)) validJobList, _invalidJobList, _nonauthJobList, _ownerJobList = self.jobPolicy.evaluateJobRights(jobList, RIGHT_SUBMIT) # Check that all the requested jobs are eligible if set(jobList) != set(validJobList): return S_ERROR(EWMSSUBM, 'Requested jobs for bulk transaction are not valid') result = gJobDB.getAttributesForJobList(jobList, ['Status', 'MinorStatus']) if not result['OK']: return S_ERROR(EWMSSUBM, 'Requested jobs for bulk transaction are not valid') jobStatusDict = result['Value'] # Check if the jobs are already activated jobEnabledList = [jobID for jobID in jobList if jobStatusDict[jobID]['Status'] in ["Received", "Checking", "Waiting", "Matched", "Running"]] if set(jobEnabledList) == set(jobList): return S_OK(jobList) # Check that requested job are in Submitting status jobUpdateStatusList = list(jobID for jobID in jobList if jobStatusDict[jobID]['Status'] == "Submitting") if set(jobUpdateStatusList) != set(jobList): return S_ERROR(EWMSSUBM, 'Requested jobs for bulk transaction are not valid') # Update status of all the requested jobs in one transaction result = gJobDB.setJobAttributes(jobUpdateStatusList, ['Status', 'MinorStatus'], ['Received', 'Job accepted']) if not result['OK']: return result self.__sendJobsToOptimizationMind(jobUpdateStatusList) return S_OK(jobUpdateStatusList) ########################################################################### def __checkIfProxyUploadIsRequired(self): result = gProxyManager.userHasProxy(self.ownerDN, self.ownerGroup, validSeconds=18000) if not result['OK']: gLogger.error("Can't check if the user has proxy uploaded:", result['Message']) return True # Check if an upload is required return not result['Value'] ########################################################################### @staticmethod def __getJobList(jobInput): """ Evaluate the jobInput into a list of ints :param jobInput: one or more job IDs in int or str form :type jobInput: str or int or list :return : a list of int job IDs """ if isinstance(jobInput, int): return [jobInput] if isinstance(jobInput, basestring): try: ijob = int(jobInput) return [ijob] except BaseException: return [] if isinstance(jobInput, list): try: ljob = [int(x) for x in jobInput] return ljob except BaseException: return [] return [] ########################################################################### types_rescheduleJob = [] def export_rescheduleJob(self, jobIDs): """ Reschedule a single job. If the optional proxy parameter is given it will be used to refresh the proxy in the Proxy Repository :param jobIDList: list of job IDs :return: confirmed job IDs """ jobList = self.__getJobList(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(jobList, RIGHT_RESCHEDULE) for jobID in validJobList: gtaskQueueDB.deleteJob(jobID) # gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) gLogger.debug(str(result)) if not result['OK']: return result gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') if invalidJobList or nonauthJobList: result = S_ERROR('Some jobs failed reschedule') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList return result result = S_OK(validJobList) result['requireProxyUpload'] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() self.__sendJobsToOptimizationMind(validJobList) return result @staticmethod def __deleteJob(jobID): """ Delete one job """ result = gJobDB.setJobStatus(jobID, 'Deleted', 'Checking accounting') if not result['OK']: return result result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue') # if it was the last job for the pilot, clear PilotsLogging about it result = gPilotAgentsDB.getPilotsForJobID(jobID) if not result['OK']: gLogger.error("Failed to get Pilots for JobID", result['Message']) return result for pilot in result['Value']: res = gPilotAgentsDB.getJobsForPilot(pilot) if not res['OK']: gLogger.error("Failed to get jobs for pilot", res['Message']) return res if not res['Value']: # if list of jobs for pilot is empty, delete pilot and pilotslogging result = gPilotAgentsDB.getPilotInfo(pilotID=pilot) if not result['OK']: gLogger.error("Failed to get pilot info", result['Message']) return result pilotRef = result[0]['PilotJobReference'] ret = gPilotAgentsDB.deletePilot(pilot) if not ret['OK']: gLogger.error("Failed to delete pilot from PilotAgentsDB", ret['Message']) return ret if enablePilotsLogging: ret = gPilotsLoggingDB.deletePilotsLogging(pilotRef) if not ret['OK']: gLogger.error("Failed to delete pilot logging from PilotAgentsDB", ret['Message']) return ret return S_OK() @staticmethod def __killJob(jobID, sendKillCommand=True): """ Kill one job """ if sendKillCommand: result = gJobDB.setJobCommand(jobID, 'Kill') if not result['OK']: return result gLogger.info('Job %d is marked for termination' % jobID) result = gJobDB.setJobStatus(jobID, 'Killed', 'Marked for termination') if not result['OK']: gLogger.warn('Failed to set job Killed status', result['Message']) result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue', result['Message']) return S_OK() def __kill_delete_jobs(self, jobIDList, right): """ Kill or delete jobs as necessary """ jobList = self.__getJobList(jobIDList) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDList)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(jobList, right) # Get job status to see what is to be killed or deleted result = gJobDB.getAttributesForJobList(validJobList, ['Status']) if not result['OK']: return result killJobList = [] deleteJobList = [] markKilledJobList = [] stagingJobList = [] for jobID, sDict in result['Value'].items(): if sDict['Status'] in ['Running', 'Matched', 'Stalled']: killJobList.append(jobID) elif sDict['Status'] in ['Done', 'Failed', 'Killed']: if not right == RIGHT_KILL: deleteJobList.append(jobID) else: markKilledJobList.append(jobID) if sDict['Status'] in ['Staging']: stagingJobList.append(jobID) badIDs = [] for jobID in markKilledJobList: result = self.__killJob(jobID, sendKillCommand=False) if not result['OK']: badIDs.append(jobID) for jobID in killJobList: result = self.__killJob(jobID) if not result['OK']: badIDs.append(jobID) for jobID in deleteJobList: result = self.__deleteJob(jobID) if not result['OK']: badIDs.append(jobID) if stagingJobList: stagerClient = StorageManagerClient() gLogger.info('Going to send killing signal to stager as well!') result = stagerClient.killTasksBySourceTaskID(stagingJobList) if not result['OK']: gLogger.warn('Failed to kill some Stager tasks: %s' % result['Message']) if nonauthJobList or badIDs: result = S_ERROR('Some jobs failed deletion') if nonauthJobList: gLogger.warn("Non-authorized JobIDs won't be deleted", str(nonauthJobList)) result['NonauthorizedJobIDs'] = nonauthJobList if badIDs: gLogger.warn("JobIDs failed to be deleted", str(badIDs)) result['FailedJobIDs'] = badIDs return result result = S_OK(validJobList) result['requireProxyUpload'] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() if invalidJobList: result['InvalidJobIDs'] = invalidJobList return result ########################################################################### types_deleteJob = [] def export_deleteJob(self, jobIDs): """ Delete jobs specified in the jobIDs list :param jobIDList: list of job IDs :return: S_OK/S_ERROR """ return self.__kill_delete_jobs(jobIDs, RIGHT_DELETE) ########################################################################### types_killJob = [] def export_killJob(self, jobIDs): """ Kill jobs specified in the jobIDs list :param jobIDList: list of job IDs :return: S_OK/S_ERROR """ return self.__kill_delete_jobs(jobIDs, RIGHT_KILL) ########################################################################### types_resetJob = [] def export_resetJob(self, jobIDs): """ Reset jobs specified in the jobIDs list :param jobIDList: list of job IDs :return: S_OK/S_ERROR """ jobList = self.__getJobList(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(jobList, RIGHT_RESET) badIDs = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobAttribute(jobID, 'RescheduleCounter', -1) if not result['OK']: badIDs.append(jobID) else: gtaskQueueDB.deleteJob(jobID) # gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) if not result['OK']: badIDs.append(jobID) else: good_ids.append(jobID) gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') self.__sendJobsToOptimizationMind(good_ids) if invalidJobList or nonauthJobList or badIDs: result = S_ERROR('Some jobs failed resetting') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if badIDs: result['FailedJobIDs'] = badIDs return result result = S_OK() result['requireProxyUpload'] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result
class JobManagerHandlerMixin: """RequestHandler implementation of the JobManager""" @classmethod def initializeHandler(cls, serviceInfoDict): """Initialization of DB objects and OptimizationMind""" try: result = ObjectLoader().loadObject( "WorkloadManagementSystem.DB.JobDB", "JobDB") if not result["OK"]: return result cls.jobDB = result["Value"](parentLogger=cls.log) result = ObjectLoader().loadObject( "WorkloadManagementSystem.DB.JobLoggingDB", "JobLoggingDB") if not result["OK"]: return result cls.jobLoggingDB = result["Value"](parentLogger=cls.log) result = ObjectLoader().loadObject( "WorkloadManagementSystem.DB.TaskQueueDB", "TaskQueueDB") if not result["OK"]: return result cls.taskQueueDB = result["Value"](parentLogger=cls.log) result = ObjectLoader().loadObject( "WorkloadManagementSystem.DB.PilotAgentsDB", "PilotAgentsDB") if not result["OK"]: return result cls.pilotAgentsDB = result["Value"](parentLogger=cls.log) except RuntimeError as excp: return S_ERROR("Can't connect to DB: %s" % excp) cls.pilotsLoggingDB = None enablePilotsLogging = Operations().getValue( "/Services/JobMonitoring/usePilotsLoggingFlag", False) if enablePilotsLogging: try: result = ObjectLoader().loadObject( "WorkloadManagementSystem.DB.PilotsLoggingDB", "PilotsLoggingDB") if not result["OK"]: return result cls.pilotsLoggingDB = result["Value"](parentLogger=cls.log) except RuntimeError as excp: return S_ERROR("Can't connect to DB: %s" % excp) cls.msgClient = MessageClient("WorkloadManagement/OptimizationMind") result = cls.msgClient.connect(JobManager=True) if not result["OK"]: cls.log.warn("Cannot connect to OptimizationMind!", result["Message"]) return S_OK() def initializeRequest(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict["DN"] self.ownerGroup = credDict["group"] self.userProperties = credDict["properties"] self.owner = credDict["username"] self.peerUsesLimitedProxy = credDict["isLimitedProxy"] self.maxParametricJobs = self.srv_getCSOption("MaxParametricJobs", MAX_PARAMETRIC_JOBS) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.userProperties) self.jobPolicy.jobDB = self.jobDB return S_OK() def __sendJobsToOptimizationMind(self, jids): if not self.msgClient.connected: result = self.msgClient.connect(JobManager=True) if not result["OK"]: self.log.warn("Cannot connect to OptimizationMind!", result["Message"]) return result = self.msgClient.createMessage("OptimizeJobs") if not result["OK"]: self.log.error("Cannot create Optimize message", result["Message"]) return msgObj = result["Value"] msgObj.jids = list(sorted(jids)) result = self.msgClient.sendMessage(msgObj) if not result["OK"]: self.log.error("Cannot send Optimize message", result["Message"]) return self.log.info("Optimize msg sent", "for %s jobs" % len(jids)) ########################################################################### types_getMaxParametricJobs = [] def export_getMaxParametricJobs(self): """Get the maximum number of parametric jobs :return: S_OK()/S_ERROR() """ return S_OK(self.maxParametricJobs) types_submitJob = [str] def export_submitJob(self, jobDesc): """Submit a job to DIRAC WMS. The job can be a single job, or a parametric job. If it is a parametric job, then the parameters will need to be unpacked. :param str jobDesc: job description JDL (of a single or parametric job) :return: S_OK/S_ERROR, a list of newly created job IDs in case of S_OK. """ if self.peerUsesLimitedProxy: return S_ERROR(EWMSSUBM, "Can't submit using a limited proxy") # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result["OK"]: return S_ERROR(EWMSSUBM, "Failed to get job policies") policyDict = result["Value"] if not policyDict[RIGHT_SUBMIT]: return S_ERROR(EWMSSUBM, "Job submission not authorized") # jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parametric one jobClassAd = ClassAd(jobDesc) result = getParameterVectorLength(jobClassAd) if not result["OK"]: self.log.error("Issue with getParameterVectorLength", result["Message"]) return result nJobs = result["Value"] parametricJob = False if nJobs is not None and nJobs > 0: # if we are here, then jobDesc was the description of a parametric job. So we start unpacking parametricJob = True if nJobs > self.maxParametricJobs: self.log.error( "Maximum of parametric jobs exceeded:", "limit %d smaller than number of jobs %d" % (self.maxParametricJobs, nJobs), ) return S_ERROR( EWMSJDL, "Number of parametric jobs exceeds the limit of %d" % self.maxParametricJobs) result = generateParametricJobs(jobClassAd) if not result["OK"]: return result jobDescList = result["Value"] else: # if we are here, then jobDesc was the description of a single job. jobDescList = [jobDesc] jobIDList = [] if parametricJob: initialStatus = JobStatus.SUBMITTING initialMinorStatus = "Bulk transaction confirmation" else: initialStatus = JobStatus.RECEIVED initialMinorStatus = "Job accepted" for jobDescription in jobDescList: # jobDescList because there might be a list generated by a parametric job result = self.jobDB.insertNewJobIntoDB( jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup, initialStatus=initialStatus, initialMinorStatus=initialMinorStatus, ) if not result["OK"]: return result jobID = result["JobID"] self.log.info('Job added to the JobDB", "%s for %s/%s' % (jobID, self.ownerDN, self.ownerGroup)) self.jobLoggingDB.addLoggingRecord(jobID, result["Status"], result["MinorStatus"], source="JobManager") jobIDList.append(jobID) # Set persistency flag retVal = gProxyManager.getUserPersistence(self.ownerDN, self.ownerGroup) if "Value" not in retVal or not retVal["Value"]: gProxyManager.setPersistency(self.ownerDN, self.ownerGroup, True) if parametricJob: result = S_OK(jobIDList) else: result = S_OK(jobIDList[0]) result["JobID"] = result["Value"] result["requireProxyUpload"] = self.__checkIfProxyUploadIsRequired() # Ensure non-parametric jobs (i.e. non-bulk) get sent to optimizer immediately if not parametricJob: self.__sendJobsToOptimizationMind(jobIDList) return result ########################################################################### types_confirmBulkSubmission = [list] def export_confirmBulkSubmission(self, jobIDs): """Confirm the possibility to proceed with processing of the jobs specified by the jobIDList :param list jobIDs: list of job IDs :return: S_OK(list)/S_ERROR() -- confirmed job IDs """ jobList = self.__getJobList(jobIDs) if not jobList: self.log.error("Issue with __getJobList", ": invalid job specification %s" % str(jobIDs)) return S_ERROR(EWMSSUBM, "Invalid job specification: " + str(jobIDs)) validJobList, _invalidJobList, _nonauthJobList, _ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_SUBMIT) # Check that all the requested jobs are eligible if set(jobList) != set(validJobList): return S_ERROR( EWMSSUBM, "Requested jobs for bulk transaction are not valid") result = self.jobDB.getJobsAttributes(jobList, ["Status", "MinorStatus"]) if not result["OK"]: return S_ERROR( EWMSSUBM, "Requested jobs for bulk transaction are not valid") js_dict = strToIntDict(result["Value"]) # Check if the jobs are already activated jobEnabledList = [ jobID for jobID in jobList if js_dict[jobID]["Status"] in [ JobStatus.RECEIVED, JobStatus.CHECKING, JobStatus.WAITING, JobStatus.MATCHED, JobStatus.RUNNING ] ] if set(jobEnabledList) == set(jobList): return S_OK(jobList) # Check that requested job are in Submitting status jobUpdateStatusList = list( jobID for jobID in jobList if js_dict[jobID]["Status"] == JobStatus.SUBMITTING) if set(jobUpdateStatusList) != set(jobList): return S_ERROR( EWMSSUBM, "Requested jobs for bulk transaction are not valid") # Update status of all the requested jobs in one transaction result = self.jobDB.setJobAttributes( jobUpdateStatusList, ["Status", "MinorStatus"], [JobStatus.RECEIVED, "Job accepted"]) if not result["OK"]: return result self.__sendJobsToOptimizationMind(jobUpdateStatusList) return S_OK(jobUpdateStatusList) ########################################################################### def __checkIfProxyUploadIsRequired(self): """Check if an upload is required :return: bool """ result = gProxyManager.userHasProxy(self.ownerDN, self.ownerGroup, validSeconds=18000) if not result["OK"]: self.log.error("Can't check if the user has proxy uploaded", result["Message"]) return True # Check if an upload is required return not result["Value"] ########################################################################### @staticmethod def __getJobList(jobInput): """Evaluate the jobInput into a list of ints :param jobInput: one or more job IDs in int or str form :type jobInput: str or int or list :return : a list of int job IDs """ if isinstance(jobInput, int): return [jobInput] if isinstance(jobInput, str): try: ijob = int(jobInput) return [ijob] except ValueError: return [] if isinstance(jobInput, list): try: ljob = [int(x) for x in jobInput] return ljob except ValueError: return [] return [] ########################################################################### types_rescheduleJob = [] def export_rescheduleJob(self, jobIDs): """Reschedule a single job. If the optional proxy parameter is given it will be used to refresh the proxy in the Proxy Repository :param list jobIDs: list of job IDs :return: S_OK()/S_ERROR() -- confirmed job IDs """ jobList = self.__getJobList(jobIDs) if not jobList: return S_ERROR("Invalid job specification: " + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESCHEDULE) for jobID in validJobList: self.taskQueueDB.deleteJob(jobID) # gJobDB.deleteJobFromQueue(jobID) result = self.jobDB.rescheduleJob(jobID) self.log.debug(str(result)) if not result["OK"]: return result self.jobLoggingDB.addLoggingRecord( result["JobID"], status=result["Status"], minorStatus=result["MinorStatus"], applicationStatus="Unknown", source="JobManager", ) if invalidJobList or nonauthJobList: result = S_ERROR("Some jobs failed reschedule") if invalidJobList: result["InvalidJobIDs"] = invalidJobList if nonauthJobList: result["NonauthorizedJobIDs"] = nonauthJobList return result result = S_OK(validJobList) result["requireProxyUpload"] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() self.__sendJobsToOptimizationMind(validJobList) return result types_removeJob = [] def export_removeJob(self, jobIDs): """ Completely remove a list of jobs, also from TaskQueueDB, and including its JobLogging info. Only authorized users are allowed to remove jobs. :param list jobIDs: list of job IDs :return: S_OK()/S_ERROR() -- confirmed job IDs """ jobList = self.__getJobList(jobIDs) if not jobList: return S_ERROR("Invalid job specification: " + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, _ = self.jobPolicy.evaluateJobRights( jobList, RIGHT_DELETE) count = 0 error_count = 0 if validJobList: self.log.verbose("Removing jobs", "(n=%d)" % len(validJobList)) result = self.jobDB.removeJobFromDB(validJobList) if not result["OK"]: self.log.error("Failed to remove jobs from JobDB", "(n=%d)" % len(validJobList)) else: self.log.info("Removed jobs from JobDB", "(n=%d)" % len(validJobList)) for jobID in validJobList: resultTQ = self.taskQueueDB.deleteJob(jobID) if not resultTQ["OK"]: self.log.warn("Failed to remove job from TaskQueueDB", "(%d): %s" % (jobID, resultTQ["Message"])) error_count += 1 else: count += 1 result = self.jobLoggingDB.deleteJob(validJobList) if not result["OK"]: self.log.error("Failed to remove jobs from JobLoggingDB", "(n=%d)" % len(validJobList)) else: self.log.info("Removed jobs from JobLoggingDB", "(n=%d)" % len(validJobList)) if count > 0 or error_count > 0: self.log.info( "Removed jobs from DB", "(%d jobs with %d errors)" % (count, error_count)) if invalidJobList or nonauthJobList: self.log.error( "Jobs can not be removed", ": %d invalid and %d in nonauthJobList" % (len(invalidJobList), len(nonauthJobList)), ) errMsg = "Some jobs failed removal" res = S_ERROR() if invalidJobList: self.log.debug("Invalid jobs: %s" % ",".join(str(ij) for ij in invalidJobList)) res["InvalidJobIDs"] = invalidJobList errMsg += ": invalid jobs" if nonauthJobList: self.log.debug("nonauthJobList jobs: %s" % ",".join(str(nj) for nj in nonauthJobList)) res["NonauthorizedJobIDs"] = nonauthJobList errMsg += ": non-authorized jobs" res["Message"] = errMsg return res return S_OK(validJobList) def __deleteJob(self, jobID): """Set the job status to "Deleted" and remove the pilot that ran and its logging info if the pilot is finished. :param int jobID: job ID :return: S_OK()/S_ERROR() """ result = self.jobDB.setJobStatus(jobID, JobStatus.DELETED, "Checking accounting") if not result["OK"]: return result result = self.taskQueueDB.deleteJob(jobID) if not result["OK"]: self.log.warn("Failed to delete job from the TaskQueue") # if it was the last job for the pilot, clear PilotsLogging about it result = self.pilotAgentsDB.getPilotsForJobID(jobID) if not result["OK"]: self.log.error("Failed to get Pilots for JobID", result["Message"]) return result for pilot in result["Value"]: res = self.pilotAgentsDB.getJobsForPilot(pilot) if not res["OK"]: self.log.error("Failed to get jobs for pilot", res["Message"]) return res if not res[ "Value"]: # if list of jobs for pilot is empty, delete pilot and pilotslogging result = self.pilotAgentsDB.getPilotInfo(pilotID=pilot) if not result["OK"]: self.log.error("Failed to get pilot info", result["Message"]) return result pilotRef = result[0]["PilotJobReference"] ret = self.pilotAgentsDB.deletePilot(pilot) if not ret["OK"]: self.log.error("Failed to delete pilot from PilotAgentsDB", ret["Message"]) return ret if self.pilotsLoggingDB: ret = self.pilotsLoggingDB.deletePilotsLogging(pilotRef) if not ret["OK"]: self.log.error( "Failed to delete pilot logging from PilotAgentsDB", ret["Message"]) return ret return S_OK() def __killJob(self, jobID, sendKillCommand=True): """Kill one job :param int jobID: job ID :param bool sendKillCommand: send kill command :return: S_OK()/S_ERROR() """ if sendKillCommand: result = self.jobDB.setJobCommand(jobID, "Kill") if not result["OK"]: return result self.log.info("Job marked for termination", jobID) result = self.jobDB.setJobStatus(jobID, JobStatus.KILLED, "Marked for termination") if not result["OK"]: self.log.warn("Failed to set job Killed status", result["Message"]) result = self.taskQueueDB.deleteJob(jobID) if not result["OK"]: self.log.warn("Failed to delete job from the TaskQueue", result["Message"]) return S_OK() def __kill_delete_jobs(self, jobIDList, right): """Kill (== set the status to "KILLED") or delete (== set the status to "DELETED") jobs as necessary :param list jobIDList: job IDs :param str right: right :return: S_OK()/S_ERROR() """ jobList = self.__getJobList(jobIDList) if not jobList: return S_ERROR("Invalid job specification: " + str(jobIDList)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, right) badIDs = [] if validJobList: # Get job status to see what is to be killed or deleted result = self.jobDB.getJobsAttributes(validJobList, ["Status"]) if not result["OK"]: return result killJobList = [] deleteJobList = [] markKilledJobList = [] stagingJobList = [] for jobID, sDict in result["Value"].items(): # can be an iterator if sDict["Status"] in (JobStatus.RUNNING, JobStatus.MATCHED, JobStatus.STALLED): killJobList.append(jobID) elif sDict["Status"] in ( JobStatus.SUBMITTING, JobStatus.RECEIVED, JobStatus.CHECKING, JobStatus.WAITING, JobStatus.RESCHEDULED, JobStatus.DONE, JobStatus.FAILED, JobStatus.KILLED, ): if not right == RIGHT_KILL: deleteJobList.append(jobID) else: markKilledJobList.append(jobID) if sDict["Status"] in [JobStatus.STAGING]: stagingJobList.append(jobID) for jobID in markKilledJobList: result = self.__killJob(jobID, sendKillCommand=False) if not result["OK"]: badIDs.append(jobID) for jobID in killJobList: result = self.__killJob(jobID) if not result["OK"]: badIDs.append(jobID) for jobID in deleteJobList: result = self.__deleteJob(jobID) if not result["OK"]: badIDs.append(jobID) if stagingJobList: stagerClient = StorageManagerClient() self.log.info( "Going to send killing signal to stager as well!") result = stagerClient.killTasksBySourceTaskID(stagingJobList) if not result["OK"]: self.log.warn("Failed to kill some Stager tasks", result["Message"]) if nonauthJobList or badIDs: result = S_ERROR("Some jobs failed deletion") if nonauthJobList: self.log.warn("Non-authorized JobIDs won't be deleted", str(nonauthJobList)) result["NonauthorizedJobIDs"] = nonauthJobList if badIDs: self.log.warn("JobIDs failed to be deleted", str(badIDs)) result["FailedJobIDs"] = badIDs return result result = S_OK(validJobList) result["requireProxyUpload"] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() if invalidJobList: result["InvalidJobIDs"] = invalidJobList return result ########################################################################### types_deleteJob = [] def export_deleteJob(self, jobIDs): """Delete jobs specified in the jobIDs list :param list jobIDs: list of job IDs :return: S_OK/S_ERROR """ return self.__kill_delete_jobs(jobIDs, RIGHT_DELETE) ########################################################################### types_killJob = [] def export_killJob(self, jobIDs): """Kill jobs specified in the jobIDs list :param list jobIDs: list of job IDs :return: S_OK/S_ERROR """ return self.__kill_delete_jobs(jobIDs, RIGHT_KILL) ########################################################################### types_resetJob = [] def export_resetJob(self, jobIDs): """Reset jobs specified in the jobIDs list :param list jobIDs: list of job IDs :return: S_OK/S_ERROR """ jobList = self.__getJobList(jobIDs) if not jobList: return S_ERROR("Invalid job specification: " + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESET) badIDs = [] good_ids = [] for jobID in validJobList: result = self.jobDB.setJobAttribute(jobID, "RescheduleCounter", -1) if not result["OK"]: badIDs.append(jobID) else: self.taskQueueDB.deleteJob(jobID) # gJobDB.deleteJobFromQueue(jobID) result = self.jobDB.rescheduleJob(jobID) if not result["OK"]: badIDs.append(jobID) else: good_ids.append(jobID) self.jobLoggingDB.addLoggingRecord( result["JobID"], status=result["Status"], minorStatus=result["MinorStatus"], applicationStatus="Unknown", source="JobManager", ) self.__sendJobsToOptimizationMind(good_ids) if invalidJobList or nonauthJobList or badIDs: result = S_ERROR("Some jobs failed resetting") if invalidJobList: result["InvalidJobIDs"] = invalidJobList if nonauthJobList: result["NonauthorizedJobIDs"] = nonauthJobList if badIDs: result["FailedJobIDs"] = badIDs return result result = S_OK() result["requireProxyUpload"] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result
class JobManagerHandler(RequestHandler): def initialize(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] self.userProperties = credDict['properties'] self.owner = credDict['username'] self.peerUsesLimitedProxy = credDict['isLimitedProxy'] self.diracSetup = self.serviceInfoDict['clientSetup'] serviceSectionPath = self.serviceInfoDict['serviceSectionPath'] self.maxParametricJobs = gConfig.getValue( '%s/MaxParametricJobs' % serviceSectionPath, MAX_PARAMETRIC_JOBS) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.userProperties) ########################################################################### types_submitJob = [StringType] def export_submitJob(self, jobDesc): """ Submit a single job to DIRAC WMS """ if self.peerUsesLimitedProxy: return S_ERROR("Can't submit using a limited proxy! (bad boy!)") # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result['OK']: return S_ERROR('Failed to get job policies') policyDict = result['Value'] if not policyDict[RIGHT_SUBMIT]: return S_ERROR('Job submission not authorized') #jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parameteric one jobClassAd = ClassAd(jobDesc) parametricJob = False if jobClassAd.lookupAttribute('Parameters'): parametricJob = True if jobClassAd.isAttributeList('Parameters'): parameterList = jobClassAd.getListFromExpression('Parameters') else: pStep = 0 pFactor = 1 nParameters = jobClassAd.getAttributeInt('Parameters') if not nParameters: value = jobClassAd.get_expression('Parameters') return S_ERROR( 'Illegal value for Parameters JDL field: %s' % value) if jobClassAd.lookupAttribute('ParameterStart'): value = jobClassAd.get_expression( 'ParameterStart').replace('"', '') try: pStart = int(value) except: try: pStart = float(value) except: return S_ERROR( 'Illegal value for ParameterStart JDL field: %s' % value) if jobClassAd.lookupAttribute('ParameterStep'): pStep = jobClassAd.getAttributeInt('ParameterStep') if not pStep: pStep = jobClassAd.getAttributeFloat('ParameterStep') if not pStep: value = jobClassAd.get_expression('ParameterStep') return S_ERROR( 'Illegal value for ParameterStep JDL field: %s' % value) if jobClassAd.lookupAttribute('ParameterFactor'): pFactor = jobClassAd.getAttributeInt('ParameterFactor') if not pFactor: pFactor = jobClassAd.getAttributeFloat( 'ParameterFactor') if not pFactor: value = jobClassAd.get_expression( 'ParameterFactor') return S_ERROR( 'Illegal value for ParameterFactor JDL field: %s' % value) parameterList = list() parameterList.append(pStart) for i in range(nParameters - 1): parameterList.append(parameterList[i] * pFactor + pStep) if len(parameterList) > self.maxParametricJobs: return S_ERROR( 'The number of parametric jobs exceeded the limit of %d' % self.maxParametricJobs) jobDescList = [] for n, p in enumerate(parameterList): jobDescList.append( jobDesc.replace('%s', str(p)).replace('%n', str(n))) else: jobDescList = [jobDesc] jobIDList = [] for jobDescription in jobDescList: result = gJobDB.insertNewJobIntoDB(jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup) if not result['OK']: return result jobID = result['JobID'] gLogger.info('Job %s added to the JobDB for %s/%s' % (jobID, self.ownerDN, self.ownerGroup)) gJobLoggingDB.addLoggingRecord(jobID, result['Status'], result['MinorStatus'], source='JobManager') jobIDList.append(jobID) #Set persistency flag retVal = gProxyManager.getUserPersistence(self.ownerDN, self.ownerGroup) if 'Value' not in retVal or not retVal['Value']: gProxyManager.setPersistency(self.ownerDN, self.ownerGroup, True) if parametricJob: result = S_OK(jobIDList) else: result = S_OK(jobIDList[0]) result['JobID'] = result['Value'] result['requireProxyUpload'] = self.__checkIfProxyUploadIsRequired() return result ########################################################################### def __checkIfProxyUploadIsRequired(self): result = gProxyManager.userHasProxy(self.ownerDN, self.ownerGroup, validSeconds=18000) if not result['OK']: gLogger.error("Can't check if the user has proxy uploaded:", result['Message']) return True #Check if an upload is required return result['Value'] == False ########################################################################### types_invalidateJob = [IntType] def invalidateJob(self, jobID): """ Make job with jobID invalid, e.g. because of the sandbox submission errors. """ pass ########################################################################### def __get_job_list(self, jobInput): """ Evaluate the jobInput into a list of ints """ if type(jobInput) == IntType: return [jobInput] if type(jobInput) == StringType: try: ijob = int(jobInput) return [ijob] except: return [] if type(jobInput) == ListType: try: ljob = [int(x) for x in jobInput] return ljob except: return [] return [] ########################################################################### def __evaluate_rights(self, jobList, right): """ Get access rights to jobID for the user ownerDN/ownerGroup """ self.jobPolicy.setJobDB(gJobDB) validJobList = [] invalidJobList = [] nonauthJobList = [] ownerJobList = [] for jobID in jobList: result = self.jobPolicy.getUserRightsForJob(jobID) if result['OK']: if result['Value'][right]: validJobList.append(jobID) else: nonauthJobList.append(jobID) if result['UserIsOwner']: ownerJobList.append(jobID) else: invalidJobList.append(jobID) return validJobList, invalidJobList, nonauthJobList, ownerJobList ########################################################################### types_rescheduleJob = [] def export_rescheduleJob(self, jobIDs): """ Reschedule a single job. If the optional proxy parameter is given it will be used to refresh the proxy in the Proxy Repository """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.__evaluate_rights( jobList, RIGHT_RESCHEDULE) for jobID in validJobList: gtaskQueueDB.deleteJob(jobID) #gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) gLogger.debug(str(result)) if not result['OK']: return result gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') if invalidJobList or nonauthJobList: result = S_ERROR('Some jobs failed deletion') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList return result result = S_OK(validJobList) result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result ########################################################################### types_deleteJob = [] def export_deleteJob(self, jobIDs): """ Delete jobs specified in the jobIDs list """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.__evaluate_rights( jobList, RIGHT_DELETE) bad_ids = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobStatus(jobID, 'Deleted', 'Checking accounting') if not result['OK']: bad_ids.append(jobID) else: good_ids.append(jobID) #result = gJobDB.deleteJobFromQueue(jobID) #if not result['OK']: # gLogger.warn('Failed to delete job from the TaskQueue (old)') result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue') if invalidJobList or nonauthJobList: result = S_ERROR('Some jobs failed deletion') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK(validJobList) result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result ########################################################################### types_killJob = [] def export_killJob(self, jobIDs): """ Kill jobs specified in the jobIDs list """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.__evaluate_rights( jobList, RIGHT_KILL) bad_ids = [] good_ids = [] for jobID in validJobList: # kill jobID result = gJobDB.setJobCommand(jobID, 'Kill') if not result['OK']: bad_ids.append(jobID) else: gLogger.info('Job %d is marked for termination' % jobID) good_ids.append(jobID) result = gJobDB.setJobStatus(jobID, 'Killed', 'Marked for termination') if not result['OK']: gLogger.warn('Failed to set job status') #result = gJobDB.deleteJobFromQueue(jobID) #if not result['OK']: # gLogger.warn('Failed to delete job from the TaskQueue (old)') result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue') if invalidJobList or nonauthJobList or bad_ids: result = S_ERROR('Some jobs failed deletion') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK(validJobList) result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result ########################################################################### types_resetJob = [] def export_resetJob(self, jobIDs): """ Reset jobs specified in the jobIDs list """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.__evaluate_rights( jobList, RIGHT_RESET) bad_ids = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobAttribute(jobID, 'RescheduleCounter', 0) if not result['OK']: bad_ids.append(jobID) else: gtaskQueueDB.deleteJob(jobID) #gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) if not result['OK']: bad_ids.append(jobID) else: good_ids.append(jobID) gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') if invalidJobList or nonauthJobList or bad_ids: result = S_ERROR('Some jobs failed resetting') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK() result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result