class JobMonitoringHandler(RequestHandler): def initialize(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] operations = Operations(group=self.ownerGroup) self.globalJobsInfo = operations.getValue('/Services/JobMonitoring/GlobalJobsInfo', True) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.globalJobsInfo) self.jobPolicy.setJobDB(gJobDB) return S_OK() ############################################################################## types_getApplicationStates = [] @staticmethod def export_getApplicationStates(): """ Return Distinct Values of ApplicationStatus job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('ApplicationStatus') ############################################################################## types_getJobTypes = [] @staticmethod def export_getJobTypes(): """ Return Distinct Values of JobType job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('JobType') ############################################################################## types_getOwners = [] @staticmethod def export_getOwners(): """ Return Distinct Values of Owner job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('Owner') ############################################################################## types_getProductionIds = [] @staticmethod def export_getProductionIds(): """ Return Distinct Values of ProductionId job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('JobGroup') ############################################################################## types_getJobGroups = [] @staticmethod def export_getJobGroups(condDict=None, cutDate=None): """ Return Distinct Values of ProductionId job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('JobGroup', condDict, newer=cutDate) ############################################################################## types_getSites = [] @staticmethod def export_getSites(): """ Return Distinct Values of Site job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('Site') ############################################################################## types_getStates = [] @staticmethod def export_getStates(): """ Return Distinct Values of Status job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('Status') ############################################################################## types_getMinorStates = [] @staticmethod def export_getMinorStates(): """ Return Distinct Values of Minor Status job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('MinorStatus') ############################################################################## types_getJobs = [] @staticmethod def export_getJobs(attrDict=None, cutDate=None): """ Return list of JobIds matching the condition given in attrDict """ # queryDict = {} # if attrDict: # if type ( attrDict ) != dict: # return S_ERROR( 'Argument must be of Dict Type' ) # for attribute in self.queryAttributes: # # Only those Attribute in self.queryAttributes can be used # if attrDict.has_key(attribute): # queryDict[attribute] = attrDict[attribute] print attrDict return gJobDB.selectJobs(attrDict, newer=cutDate) ############################################################################## types_getCounters = [list] @staticmethod def export_getCounters(attrList, attrDict=None, cutDate=''): """ Retrieve list of distinct attributes values from attrList with attrDict as condition. For each set of distinct values, count number of occurences. Return a list. Each item is a list with 2 items, the list of distinct attribute values and the counter """ # Check that Attributes in attrList and attrDict, they must be in # self.queryAttributes. # for attr in attrList: # try: # self.queryAttributes.index(attr) # except: # return S_ERROR( 'Requested Attribute not Allowed: %s.' % attr ) # # for attr in attrDict: # try: # self.queryAttributes.index(attr) # except: # return S_ERROR( 'Condition Attribute not Allowed: %s.' % attr ) cutDate = str(cutDate) if not attrDict: attrDict = {} return gJobDB.getCounters('Jobs', attrList, attrDict, newer=cutDate, timeStamp='LastUpdateTime') ############################################################################## types_getCurrentJobCounters = [] @staticmethod def export_getCurrentJobCounters(attrDict=None): """ Get job counters per Status with attrDict selection. Final statuses are given for the last day. """ if not attrDict: attrDict = {} result = gJobDB.getCounters('Jobs', ['Status'], attrDict, timeStamp='LastUpdateTime') if not result['OK']: return result last_update = Time.dateTime() - Time.day resultDay = gJobDB.getCounters('Jobs', ['Status'], attrDict, newer=last_update, timeStamp='LastUpdateTime') if not resultDay['OK']: return resultDay resultDict = {} for statusDict, count in result['Value']: status = statusDict['Status'] resultDict[status] = count if status in FINAL_STATES: resultDict[status] = 0 for statusDayDict, ccount in resultDay['Value']: if status == statusDayDict['Status']: resultDict[status] = ccount break return S_OK(resultDict) ############################################################################## types_getJobStatus = [int] @staticmethod def export_getJobStatus(jobID): return gJobDB.getJobAttribute(jobID, 'Status') ############################################################################## types_getJobOwner = [int] @staticmethod def export_getJobOwner(jobID): return gJobDB.getJobAttribute(jobID, 'Owner') ############################################################################## types_getJobSite = [int] @staticmethod def export_getJobSite(jobID): return gJobDB.getJobAttribute(jobID, 'Site') ############################################################################## types_getJobJDL = [int, bool] @staticmethod def export_getJobJDL(jobID, original): return gJobDB.getJobJDL(jobID, original=original) ############################################################################## types_getJobLoggingInfo = [int] @staticmethod def export_getJobLoggingInfo(jobID): return gJobLoggingDB.getJobLoggingInfo(jobID) ############################################################################## types_getJobsParameters = [list, list] @staticmethod def export_getJobsParameters(jobIDs, parameters): if not (jobIDs and parameters): return S_OK({}) return gJobDB.getAttributesForJobList(jobIDs, parameters) ############################################################################## types_getJobsStatus = [list] @staticmethod def export_getJobsStatus(jobIDs): if not jobIDs: return S_OK({}) return gJobDB.getAttributesForJobList(jobIDs, ['Status']) ############################################################################## types_getJobsMinorStatus = [list] @staticmethod def export_getJobsMinorStatus(jobIDs): return gJobDB.getAttributesForJobList(jobIDs, ['MinorStatus']) ############################################################################## types_getJobsApplicationStatus = [list] @staticmethod def export_getJobsApplicationStatus(jobIDs): return gJobDB.getAttributesForJobList(jobIDs, ['ApplicationStatus']) ############################################################################## types_getJobsSites = [list] @staticmethod def export_getJobsSites(jobIDs): return gJobDB.getAttributesForJobList(jobIDs, ['Site']) ############################################################################## types_getJobSummary = [int] @staticmethod def export_getJobSummary(jobID): return gJobDB.getJobAttributes(jobID, SUMMARY) ############################################################################## types_getJobPrimarySummary = [int] @staticmethod def export_getJobPrimarySummary(jobID): return gJobDB.getJobAttributes(jobID, PRIMARY_SUMMARY) ############################################################################## types_getJobsSummary = [list] @staticmethod def export_getJobsSummary(jobIDs): if not jobIDs: return S_ERROR('JobMonitoring.getJobsSummary: Received empty job list') result = gJobDB.getAttributesForJobList(jobIDs, SUMMARY) # return result restring = str(result['Value']) return S_OK(restring) ############################################################################## types_getJobPageSummaryWeb = [dict, list, int, int] def export_getJobPageSummaryWeb(self, selectDict, sortList, startItem, maxItems, selectJobs=True): """ Get the summary of the job information for a given page in the job monitor in a generic format """ resultDict = {} startDate = selectDict.get('FromDate', None) if startDate: del selectDict['FromDate'] # For backward compatibility if startDate is None: startDate = selectDict.get('LastUpdate', None) if startDate: del selectDict['LastUpdate'] endDate = selectDict.get('ToDate', None) if endDate: del selectDict['ToDate'] result = self.jobPolicy.getControlledUsers(RIGHT_GET_INFO) if not result['OK']: return S_ERROR('Failed to evaluate user rights') if result['Value'] != 'ALL': selectDict[('Owner', 'OwnerGroup')] = result['Value'] # Sorting instructions. Only one for the moment. if sortList: orderAttribute = sortList[0][0] + ":" + sortList[0][1] else: orderAttribute = None statusDict = {} result = gJobDB.getCounters('Jobs', ['Status'], selectDict, newer=startDate, older=endDate, timeStamp='LastUpdateTime') nJobs = 0 if result['OK']: for stDict, count in result['Value']: nJobs += count statusDict[stDict['Status']] = count resultDict['TotalRecords'] = nJobs if nJobs == 0: return S_OK(resultDict) resultDict['Extras'] = statusDict if selectJobs: iniJob = startItem if iniJob >= nJobs: return S_ERROR('Item number out of range') result = gJobDB.selectJobs(selectDict, orderAttribute=orderAttribute, newer=startDate, older=endDate, limit=(maxItems, iniJob)) if not result['OK']: return S_ERROR('Failed to select jobs: ' + result['Message']) summaryJobList = result['Value'] if not self.globalJobsInfo: validJobs, _invalidJobs, _nonauthJobs, _ownJobs = self.jobPolicy.evaluateJobRights(summaryJobList, RIGHT_GET_INFO) summaryJobList = validJobs result = gJobDB.getAttributesForJobList(summaryJobList, SUMMARY) if not result['OK']: return S_ERROR('Failed to get job summary: ' + result['Message']) summaryDict = result['Value'] # Evaluate last sign of life time for jobID, jobDict in summaryDict.items(): if jobDict['HeartBeatTime'] == 'None': jobDict['LastSignOfLife'] = jobDict['LastUpdateTime'] else: lastTime = Time.fromString(jobDict['LastUpdateTime']) hbTime = Time.fromString(jobDict['HeartBeatTime']) # There is no way to express a timedelta of 0 ;-) # Not only Stalled jobs but also Failed jobs because Stalled if ((hbTime - lastTime) > (lastTime - lastTime) or jobDict['Status'] == "Stalled" or jobDict['MinorStatus'].startswith('Job stalled') or jobDict['MinorStatus'].startswith('Stalling')): jobDict['LastSignOfLife'] = jobDict['HeartBeatTime'] else: jobDict['LastSignOfLife'] = jobDict['LastUpdateTime'] tqDict = {} result = gTaskQueueDB.getTaskQueueForJobs(summaryJobList) if result['OK']: tqDict = result['Value'] # If no jobs can be selected after the properties check if not summaryDict.keys(): return S_OK(resultDict) # prepare the standard structure now key = summaryDict.keys()[0] paramNames = summaryDict[key].keys() records = [] for jobID, jobDict in summaryDict.items(): jParList = [] for pname in paramNames: jParList.append(jobDict[pname]) jParList.append(tqDict.get(jobID, 0)) records.append(jParList) resultDict['ParameterNames'] = paramNames + ['TaskQueueID'] resultDict['Records'] = records return S_OK(resultDict) ############################################################################## types_getJobStats = [basestring, dict] @staticmethod def export_getJobStats(attribute, selectDict): """ Get job statistics distribution per attribute value with a given selection """ startDate = selectDict.get('FromDate', None) if startDate: del selectDict['FromDate'] # For backward compatibility if startDate is None: startDate = selectDict.get('LastUpdate', None) if startDate: del selectDict['LastUpdate'] endDate = selectDict.get('ToDate', None) if endDate: del selectDict['ToDate'] result = gJobDB.getCounters('Jobs', [attribute], selectDict, newer=startDate, older=endDate, timeStamp='LastUpdateTime') resultDict = {} if result['OK']: for cDict, count in result['Value']: resultDict[cDict[attribute]] = count return S_OK(resultDict) ############################################################################## types_getJobsPrimarySummary = [list] @staticmethod def export_getJobsPrimarySummary(jobIDs): return gJobDB.getAttributesForJobList(jobIDs, PRIMARY_SUMMARY) ############################################################################## types_getJobParameter = [[basestring, int, long], basestring] @staticmethod def export_getJobParameter(jobID, parName): return gJobDB.getJobParameters(jobID, [parName]) ############################################################################## types_getJobParameters = [[int, long]] @staticmethod def export_getJobParameters(jobID): return gJobDB.getJobParameters(jobID) ############################################################################## types_traceJobParameter = [basestring, [basestring, int, long, list], basestring, [basestring, None], [basestring, None]] @staticmethod def export_traceJobParameter(site, localID, parameter, date, until): return gJobDB.traceJobParameter(site, localID, parameter, date, until) ############################################################################## types_traceJobParameters = [basestring, [basestring, int, long, list], [list, None], [list, None], [basestring, None], [basestring, None]] @staticmethod def export_traceJobParameters(site, localID, parameterList, attributeList, date, until): return gJobDB.traceJobParameters(site, localID, parameterList, attributeList, date, until) ############################################################################## types_getAtticJobParameters = [[int, long]] @staticmethod def export_getAtticJobParameters(jobID, parameters=None, rescheduleCycle=-1): if not parameters: parameters = [] return gJobDB.getAtticJobParameters(jobID, parameters, rescheduleCycle) ############################################################################## types_getJobAttributes = [int] @staticmethod def export_getJobAttributes(jobID): return gJobDB.getJobAttributes(jobID) ############################################################################## types_getJobAttribute = [int, basestring] @staticmethod def export_getJobAttribute(jobID, attribute): return gJobDB.getJobAttribute(jobID, attribute) ############################################################################## types_getSiteSummary = [] @staticmethod def export_getSiteSummary(): return gJobDB.getSiteSummary() ############################################################################## types_getJobHeartBeatData = [int] @staticmethod def export_getJobHeartBeatData(jobID): return gJobDB.getHeartBeatData(jobID) ############################################################################## types_getInputData = [[int, long]] @staticmethod def export_getInputData(jobID): """ Get input data for the specified jobs """ return gJobDB.getInputData(jobID) ############################################################################## types_getOwnerGroup = [] @staticmethod def export_getOwnerGroup(): """ Return Distinct Values of OwnerGroup from the JobsDB """ return gJobDB.getDistinctJobAttributes('OwnerGroup')
class JobManagerHandler( RequestHandler ): @classmethod def initializeHandler( cls, serviceInfoDict ): cls.msgClient = MessageClient( "WorkloadManagement/OptimizationMind" ) result = cls.msgClient.connect( JobManager = True ) if not result[ 'OK' ]: cls.log.error( "Cannot connect to OptimizationMind!", result[ 'Message' ] ) return result def initialize( self ): credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] self.userProperties = credDict[ 'properties' ] self.owner = credDict[ 'username' ] self.peerUsesLimitedProxy = credDict[ 'isLimitedProxy' ] self.diracSetup = self.serviceInfoDict['clientSetup'] self.maxParametricJobs = self.srv_getCSOption( 'MaxParametricJobs', MAX_PARAMETRIC_JOBS ) self.jobPolicy = JobPolicy( self.ownerDN, self.ownerGroup, self.userProperties ) return S_OK() def __sendNewJobsToMind( self, jids ): result = self.msgClient.createMessage( "OptimizeJobs" ) if not result[ 'OK' ]: self.log.error( "Cannot create Optimize message: %s" % result[ 'Message' ] ) return msgObj = result[ 'Value' ] msgObj.jids = jids result = self.msgClient.sendMessage( msgObj ) if not result[ 'OK' ]: self.log.error( "Cannot send Optimize message: %s" % result[ 'Message' ] ) return ########################################################################### types_submitJob = [ StringType ] def export_submitJob( self, jobDesc ): """ Submit a single job to DIRAC WMS """ if self.peerUsesLimitedProxy: return S_ERROR( "Can't submit using a limited proxy! (bad boy!)" ) # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result['OK']: return S_ERROR( 'Failed to get job policies' ) policyDict = result['Value'] if not policyDict[ RIGHT_SUBMIT ]: return S_ERROR( 'Job submission not authorized' ) #jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parameteric one jobClassAd = ClassAd( jobDesc ) parametricJob = False if jobClassAd.lookupAttribute( 'Parameters' ): parametricJob = True if jobClassAd.isAttributeList( 'Parameters' ): parameterList = jobClassAd.getListFromExpression( 'Parameters' ) else: pStep = 0 pFactor = 1 pStart = 1 nParameters = jobClassAd.getAttributeInt( 'Parameters' ) if not nParameters: value = jobClassAd.get_expression( 'Parameters' ) return S_ERROR( 'Illegal value for Parameters JDL field: %s' % value ) if jobClassAd.lookupAttribute( 'ParameterStart' ): value = jobClassAd.get_expression( 'ParameterStart' ).replace( '"', '' ) try: pStart = int( value ) except: try: pStart = float( value ) except: return S_ERROR( 'Illegal value for ParameterStart JDL field: %s' % value ) if jobClassAd.lookupAttribute( 'ParameterStep' ): pStep = jobClassAd.getAttributeInt( 'ParameterStep' ) if not pStep: pStep = jobClassAd.getAttributeFloat( 'ParameterStep' ) if not pStep: value = jobClassAd.get_expression( 'ParameterStep' ) return S_ERROR( 'Illegal value for ParameterStep JDL field: %s' % value ) if jobClassAd.lookupAttribute( 'ParameterFactor' ): pFactor = jobClassAd.getAttributeInt( 'ParameterFactor' ) if not pFactor: pFactor = jobClassAd.getAttributeFloat( 'ParameterFactor' ) if not pFactor: value = jobClassAd.get_expression( 'ParameterFactor' ) return S_ERROR( 'Illegal value for ParameterFactor JDL field: %s' % value ) parameterList = list() parameterList.append( pStart ) for i in range( nParameters - 1 ): parameterList.append( parameterList[i] * pFactor + pStep ) if len( parameterList ) > self.maxParametricJobs: return S_ERROR( 'The number of parametric jobs exceeded the limit of %d' % self.maxParametricJobs ) jobDescList = [] nParam = len(parameterList) - 1 for n,p in enumerate(parameterList): newJobDesc = jobDesc.replace('%s',str(p)).replace('%n',str(n).zfill(len(str(nParam)))) newClassAd = ClassAd(newJobDesc) for attr in ['Parameters','ParameterStep','ParameterFactor']: newClassAd.deleteAttribute(attr) if type( p ) == type ( ' ' ) and p.startswith('{'): newClassAd.insertAttributeInt( 'Parameter',str(p) ) else: newClassAd.insertAttributeString( 'Parameter', str( p ) ) newClassAd.insertAttributeInt( 'ParameterNumber', n ) newJDL = newClassAd.asJDL() jobDescList.append( newJDL ) else: jobDescList = [ jobDesc ] jobIDList = [] for jobDescription in jobDescList: result = gJobDB.insertNewJobIntoDB( jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup ) if not result['OK']: return result jobID = result['JobID'] gLogger.info( 'Job %s added to the JobDB for %s/%s' % ( jobID, self.ownerDN, self.ownerGroup ) ) gJobLoggingDB.addLoggingRecord( jobID, result['Status'], result['MinorStatus'], source = 'JobManager' ) jobIDList.append( jobID ) #Set persistency flag retVal = gProxyManager.getUserPersistence( self.ownerDN, self.ownerGroup ) if 'Value' not in retVal or not retVal[ 'Value' ]: gProxyManager.setPersistency( self.ownerDN, self.ownerGroup, True ) if parametricJob: result = S_OK( jobIDList ) else: result = S_OK( jobIDList[0] ) result['JobID'] = result['Value'] result[ 'requireProxyUpload' ] = self.__checkIfProxyUploadIsRequired() self.__sendNewJobsToMind( jobIDList ) return result ########################################################################### def __checkIfProxyUploadIsRequired( self ): result = gProxyManager.userHasProxy( self.ownerDN, self.ownerGroup, validSeconds = 18000 ) if not result[ 'OK' ]: gLogger.error( "Can't check if the user has proxy uploaded:", result[ 'Message' ] ) return True #Check if an upload is required return result[ 'Value' ] == False ########################################################################### types_invalidateJob = [ IntType ] def invalidateJob( self, jobID ): """ Make job with jobID invalid, e.g. because of the sandbox submission errors. """ pass ########################################################################### def __get_job_list( self, jobInput ): """ Evaluate the jobInput into a list of ints """ if type( jobInput ) == IntType: return [jobInput] if type( jobInput ) == StringType: try: ijob = int( jobInput ) return [ijob] except: return [] if type( jobInput ) == ListType: try: ljob = [ int( x ) for x in jobInput ] return ljob except: return [] return [] ########################################################################### def __evaluate_rights( self, jobList, right ): """ Get access rights to jobID for the user ownerDN/ownerGroup """ self.jobPolicy.setJobDB( gJobDB ) validJobList = [] invalidJobList = [] nonauthJobList = [] ownerJobList = [] for jobID in jobList: result = self.jobPolicy.getUserRightsForJob( jobID ) if result['OK']: if result['Value'][right]: validJobList.append( jobID ) else: nonauthJobList.append( jobID ) if result[ 'UserIsOwner' ]: ownerJobList.append( jobID ) else: invalidJobList.append( jobID ) return validJobList, invalidJobList, nonauthJobList, ownerJobList ########################################################################### types_rescheduleJob = [ ] def export_rescheduleJob( self, jobIDs ): """ Reschedule a single job. If the optional proxy parameter is given it will be used to refresh the proxy in the Proxy Repository """ jobList = self.__get_job_list( jobIDs ) if not jobList: return S_ERROR( 'Invalid job specification: ' + str( jobIDs ) ) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.__evaluate_rights( jobList, RIGHT_RESCHEDULE ) for jobID in validJobList: gtaskQueueDB.deleteJob( jobID ) #gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob( jobID ) gLogger.debug( str( result ) ) if not result['OK']: return result gJobLoggingDB.addLoggingRecord( result['JobID'], result['Status'], result['MinorStatus'], application = 'Unknown', source = 'JobManager' ) if invalidJobList or nonauthJobList: result = S_ERROR( 'Some jobs failed deletion' ) if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList return result result = S_OK( validJobList ) result[ 'requireProxyUpload' ] = len( ownerJobList ) > 0 and self.__checkIfProxyUploadIsRequired() self.__sendNewJobsToMind( validJobList ) return result ########################################################################### types_deleteJob = [ ] def export_deleteJob( self, jobIDs ): """ Delete jobs specified in the jobIDs list """ jobList = self.__get_job_list( jobIDs ) if not jobList: return S_ERROR( 'Invalid job specification: ' + str( jobIDs ) ) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.__evaluate_rights( jobList, RIGHT_DELETE ) bad_ids = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobStatus( jobID, 'Deleted', 'Checking accounting' ) if not result['OK']: bad_ids.append( jobID ) else: good_ids.append( jobID ) #result = gJobDB.deleteJobFromQueue(jobID) #if not result['OK']: # gLogger.warn('Failed to delete job from the TaskQueue (old)') result = gtaskQueueDB.deleteJob( jobID ) if not result['OK']: gLogger.warn( 'Failed to delete job from the TaskQueue' ) if invalidJobList or nonauthJobList: result = S_ERROR( 'Some jobs failed deletion' ) if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK( validJobList ) result[ 'requireProxyUpload' ] = len( ownerJobList ) > 0 and self.__checkIfProxyUploadIsRequired() return result ########################################################################### types_killJob = [ ] def export_killJob( self, jobIDs ): """ Kill jobs specified in the jobIDs list """ jobList = self.__get_job_list( jobIDs ) if not jobList: return S_ERROR( 'Invalid job specification: ' + str( jobIDs ) ) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.__evaluate_rights( jobList, RIGHT_KILL ) bad_ids = [] good_ids = [] for jobID in validJobList: # kill jobID result = gJobDB.setJobCommand( jobID, 'Kill' ) if not result['OK']: bad_ids.append( jobID ) else: gLogger.info( 'Job %d is marked for termination' % jobID ) good_ids.append( jobID ) result = gJobDB.setJobStatus( jobID, 'Killed', 'Marked for termination' ) if not result['OK']: gLogger.warn( 'Failed to set job status' ) #result = gJobDB.deleteJobFromQueue(jobID) #if not result['OK']: # gLogger.warn('Failed to delete job from the TaskQueue (old)') result = gtaskQueueDB.deleteJob( jobID ) if not result['OK']: gLogger.warn( 'Failed to delete job from the TaskQueue' ) if invalidJobList or nonauthJobList or bad_ids: result = S_ERROR( 'Some jobs failed deletion' ) if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK( validJobList ) result[ 'requireProxyUpload' ] = len( ownerJobList ) > 0 and self.__checkIfProxyUploadIsRequired() return result ########################################################################### types_resetJob = [ ] def export_resetJob( self, jobIDs ): """ Reset jobs specified in the jobIDs list """ jobList = self.__get_job_list( jobIDs ) if not jobList: return S_ERROR( 'Invalid job specification: ' + str( jobIDs ) ) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.__evaluate_rights( jobList, RIGHT_RESET ) bad_ids = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobAttribute( jobID, 'RescheduleCounter', -1 ) if not result['OK']: bad_ids.append( jobID ) else: gtaskQueueDB.deleteJob( jobID ) #gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob( jobID ) if not result['OK']: bad_ids.append( jobID ) else: good_ids.append( jobID ) gJobLoggingDB.addLoggingRecord( result['JobID'], result['Status'], result['MinorStatus'], application = 'Unknown', source = 'JobManager' ) self.__sendNewJobsToMind( good_ids ) if invalidJobList or nonauthJobList or bad_ids: result = S_ERROR( 'Some jobs failed resetting' ) if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK() result[ 'requireProxyUpload' ] = len( ownerJobList ) > 0 and self.__checkIfProxyUploadIsRequired() return result
class JobManagerHandler(RequestHandler): @classmethod def initializeHandler(cls, serviceInfoDict): cls.msgClient = MessageClient("WorkloadManagement/OptimizationMind") cls.__connectToOptMind() gThreadScheduler.addPeriodicTask(60, cls.__connectToOptMind) return S_OK() @classmethod def __connectToOptMind(cls): if not cls.msgClient.connected: result = cls.msgClient.connect(JobManager=True) if not result['OK']: cls.log.warn("Cannot connect to OptimizationMind!", result['Message']) def initialize(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] self.userProperties = credDict['properties'] self.owner = credDict['username'] self.peerUsesLimitedProxy = credDict['isLimitedProxy'] self.diracSetup = self.serviceInfoDict['clientSetup'] self.maxParametricJobs = self.srv_getCSOption('MaxParametricJobs', MAX_PARAMETRIC_JOBS) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.userProperties) self.jobPolicy.setJobDB(gJobDB) return S_OK() def __sendJobsToOptimizationMind(self, jids): if not self.msgClient.connected: return result = self.msgClient.createMessage("OptimizeJobs") if not result['OK']: self.log.error("Cannot create Optimize message: %s" % result['Message']) return msgObj = result['Value'] msgObj.jids = list(sorted(jids)) result = self.msgClient.sendMessage(msgObj) if not result['OK']: self.log.error("Cannot send Optimize message: %s" % result['Message']) return self.log.info("Optimize msg sent for %s jobs" % len(jids)) ########################################################################### types_submitJob = [StringTypes] def export_submitJob(self, jobDesc): """ Submit a single job to DIRAC WMS """ if self.peerUsesLimitedProxy: return S_ERROR("Can't submit using a limited proxy! (bad boy!)") # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result['OK']: return S_ERROR('Failed to get job policies') policyDict = result['Value'] if not policyDict[RIGHT_SUBMIT]: return S_ERROR('Job submission not authorized') #jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parametric one jobClassAd = ClassAd(jobDesc) nParameters = getNumberOfParameters(jobClassAd) parametricJob = False if nParameters > 0: parametricJob = True result = generateParametricJobs(jobClassAd) if not result['OK']: return result jobDescList = result['Value'] else: jobDescList = [jobDesc] jobIDList = [] for jobDescription in jobDescList: result = gJobDB.insertNewJobIntoDB(jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup) if not result['OK']: return result jobID = result['JobID'] gLogger.info('Job %s added to the JobDB for %s/%s' % (jobID, self.ownerDN, self.ownerGroup)) gJobLoggingDB.addLoggingRecord(jobID, result['Status'], result['MinorStatus'], source='JobManager') jobIDList.append(jobID) #Set persistency flag retVal = gProxyManager.getUserPersistence(self.ownerDN, self.ownerGroup) if 'Value' not in retVal or not retVal['Value']: gProxyManager.setPersistency(self.ownerDN, self.ownerGroup, True) if parametricJob: result = S_OK(jobIDList) else: result = S_OK(jobIDList[0]) result['JobID'] = result['Value'] result['requireProxyUpload'] = self.__checkIfProxyUploadIsRequired() self.__sendJobsToOptimizationMind(jobIDList) return result ########################################################################### def __checkIfProxyUploadIsRequired(self): result = gProxyManager.userHasProxy(self.ownerDN, self.ownerGroup, validSeconds=18000) if not result['OK']: gLogger.error("Can't check if the user has proxy uploaded:", result['Message']) return True #Check if an upload is required return result['Value'] == False ########################################################################### types_invalidateJob = [IntType] def invalidateJob(self, jobID): """ Make job with jobID invalid, e.g. because of the sandbox submission errors. """ pass ########################################################################### def __get_job_list(self, jobInput): """ Evaluate the jobInput into a list of ints """ if isinstance(jobInput, int): return [jobInput] if isinstance(jobInput, basestring): try: ijob = int(jobInput) return [ijob] except: return [] if isinstance(jobInput, list): try: ljob = [int(x) for x in jobInput] return ljob except: return [] return [] ########################################################################### types_rescheduleJob = [] def export_rescheduleJob(self, jobIDs): """ Reschedule a single job. If the optional proxy parameter is given it will be used to refresh the proxy in the Proxy Repository """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESCHEDULE) for jobID in validJobList: gtaskQueueDB.deleteJob(jobID) #gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) gLogger.debug(str(result)) if not result['OK']: return result gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') if invalidJobList or nonauthJobList: result = S_ERROR('Some jobs failed reschedule') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList return result result = S_OK(validJobList) result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() self.__sendJobsToOptimizationMind(validJobList) return result def __deleteJob(self, jobID): """ Delete one job """ result = gJobDB.setJobStatus(jobID, 'Deleted', 'Checking accounting') if not result['OK']: return result result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue') return S_OK() def __killJob(self, jobID, sendKillCommand=True): """ Kill one job """ if sendKillCommand: result = gJobDB.setJobCommand(jobID, 'Kill') if not result['OK']: return result gLogger.info('Job %d is marked for termination' % jobID) result = gJobDB.setJobStatus(jobID, 'Killed', 'Marked for termination') if not result['OK']: gLogger.warn('Failed to set job Killed status') result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue') return S_OK() def __kill_delete_jobs(self, jobIDList, right): """ Kill or delete jobs as necessary """ jobList = self.__get_job_list(jobIDList) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDList)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, right) # Get job status to see what is to be killed or deleted result = gJobDB.getAttributesForJobList(validJobList, ['Status']) if not result['OK']: return result killJobList = [] deleteJobList = [] markKilledJobList = [] stagingJobList = [] for jobID, sDict in result['Value'].items(): if sDict['Status'] in ['Running', 'Matched', 'Stalled']: killJobList.append(jobID) elif sDict['Status'] in ['Done', 'Failed']: if not right == RIGHT_KILL: deleteJobList.append(jobID) else: markKilledJobList.append(jobID) if sDict['Status'] in ['Staging']: stagingJobList.append(jobID) bad_ids = [] for jobID in markKilledJobList: result = self.__killJob(jobID, sendKillCommand=False) if not result['OK']: bad_ids.append(jobID) for jobID in killJobList: result = self.__killJob(jobID) if not result['OK']: bad_ids.append(jobID) for jobID in deleteJobList: result = self.__deleteJob(jobID) if not result['OK']: bad_ids.append(jobID) if stagingJobList: stagerClient = StorageManagerClient() gLogger.info('Going to send killing signal to stager as well!') result = stagerClient.killTasksBySourceTaskID(stagingJobList) if not result['OK']: gLogger.warn('Failed to kill some Stager tasks: %s' % result['Message']) if nonauthJobList or bad_ids: result = S_ERROR('Some jobs failed deletion') if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK(validJobList) result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() if invalidJobList: result['InvalidJobIDs'] = invalidJobList return result ########################################################################### types_deleteJob = [] def export_deleteJob(self, jobIDs): """ Delete jobs specified in the jobIDs list """ return self.__kill_delete_jobs(jobIDs, RIGHT_DELETE) ########################################################################### types_killJob = [] def export_killJob(self, jobIDs): """ Kill jobs specified in the jobIDs list """ return self.__kill_delete_jobs(jobIDs, RIGHT_KILL) ########################################################################### types_resetJob = [] def export_resetJob(self, jobIDs): """ Reset jobs specified in the jobIDs list """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESET) bad_ids = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobAttribute(jobID, 'RescheduleCounter', -1) if not result['OK']: bad_ids.append(jobID) else: gtaskQueueDB.deleteJob(jobID) #gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) if not result['OK']: bad_ids.append(jobID) else: good_ids.append(jobID) gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') self.__sendJobsToOptimizationMind(good_ids) if invalidJobList or nonauthJobList or bad_ids: result = S_ERROR('Some jobs failed resetting') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK() result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result
class JobManagerHandler(RequestHandler): """ RequestHandler implementation of the JobManager """ @classmethod def initializeHandler(cls, serviceInfoDict): cls.msgClient = MessageClient("WorkloadManagement/OptimizationMind") cls.__connectToOptMind() gThreadScheduler.addPeriodicTask(60, cls.__connectToOptMind) return S_OK() @classmethod def __connectToOptMind(cls): if not cls.msgClient.connected: result = cls.msgClient.connect(JobManager=True) if not result['OK']: cls.log.warn("Cannot connect to OptimizationMind!", result['Message']) def initialize(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] self.userProperties = credDict['properties'] self.owner = credDict['username'] self.peerUsesLimitedProxy = credDict['isLimitedProxy'] self.diracSetup = self.serviceInfoDict['clientSetup'] self.maxParametricJobs = self.srv_getCSOption('MaxParametricJobs', MAX_PARAMETRIC_JOBS) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.userProperties) self.jobPolicy.setJobDB(gJobDB) return S_OK() def __sendJobsToOptimizationMind(self, jids): if not self.msgClient.connected: return result = self.msgClient.createMessage("OptimizeJobs") if not result['OK']: self.log.error("Cannot create Optimize message: %s" % result['Message']) return msgObj = result['Value'] msgObj.jids = list(sorted(jids)) result = self.msgClient.sendMessage(msgObj) if not result['OK']: self.log.error("Cannot send Optimize message: %s" % result['Message']) return self.log.info("Optimize msg sent for %s jobs" % len(jids)) ########################################################################### types_submitJob = [basestring] def export_submitJob(self, jobDesc): """ Submit a job to DIRAC WMS. The job can be a single job, or a parametric job. If it is a parametric job, then the parameters will need to be unpacked. :param str jobDesc: job description JDL (of a single or parametric job) :return: S_OK/S_ERROR, a list of newly created job IDs in case of S_OK. """ if self.peerUsesLimitedProxy: return S_ERROR(EWMSSUBM, "Can't submit using a limited proxy") # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result['OK']: return S_ERROR(EWMSSUBM, 'Failed to get job policies') policyDict = result['Value'] if not policyDict[RIGHT_SUBMIT]: return S_ERROR(EWMSSUBM, 'Job submission not authorized') # jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parametric one jobClassAd = ClassAd(jobDesc) result = getParameterVectorLength(jobClassAd) if not result['OK']: return result nJobs = result['Value'] parametricJob = False if nJobs > 0: # if we are here, then jobDesc was the description of a parametric job. So we start unpacking parametricJob = True if nJobs > self.maxParametricJobs: return S_ERROR(EWMSJDL, "Number of parametric jobs exceeds the limit of %d" % self.maxParametricJobs) result = generateParametricJobs(jobClassAd) if not result['OK']: return result jobDescList = result['Value'] else: # if we are here, then jobDesc was the description of a single job. jobDescList = [jobDesc] jobIDList = [] if parametricJob: initialStatus = 'Submitting' initialMinorStatus = 'Bulk transaction confirmation' else: initialStatus = 'Received' initialMinorStatus = 'Job accepted' for jobDescription in jobDescList: # jobDescList because there might be a list generated by a parametric job result = gJobDB.insertNewJobIntoDB(jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup, initialStatus=initialStatus, initialMinorStatus=initialMinorStatus) if not result['OK']: return result jobID = result['JobID'] gLogger.info('Job %s added to the JobDB for %s/%s' % (jobID, self.ownerDN, self.ownerGroup)) gJobLoggingDB.addLoggingRecord(jobID, result['Status'], result['MinorStatus'], source='JobManager') jobIDList.append(jobID) # Set persistency flag retVal = gProxyManager.getUserPersistence(self.ownerDN, self.ownerGroup) if 'Value' not in retVal or not retVal['Value']: gProxyManager.setPersistency(self.ownerDN, self.ownerGroup, True) if parametricJob: result = S_OK(jobIDList) else: result = S_OK(jobIDList[0]) result['JobID'] = result['Value'] result['requireProxyUpload'] = self.__checkIfProxyUploadIsRequired() return result ########################################################################### types_confirmBulkSubmission = [list] def export_confirmBulkSubmission(self, jobIDs): """ Confirm the possibility to proceed with processing of the jobs specified by the jobIDList :param jobIDList: list of job IDs :return: confirmed job IDs """ jobList = self.__getJobList(jobIDs) if not jobList: return S_ERROR(EWMSSUBM, 'Invalid job specification: ' + str(jobIDs)) validJobList, _invalidJobList, _nonauthJobList, _ownerJobList = self.jobPolicy.evaluateJobRights(jobList, RIGHT_SUBMIT) # Check that all the requested jobs are eligible if set(jobList) != set(validJobList): return S_ERROR(EWMSSUBM, 'Requested jobs for bulk transaction are not valid') result = gJobDB.getAttributesForJobList(jobList, ['Status', 'MinorStatus']) if not result['OK']: return S_ERROR(EWMSSUBM, 'Requested jobs for bulk transaction are not valid') jobStatusDict = result['Value'] # Check if the jobs are already activated jobEnabledList = [jobID for jobID in jobList if jobStatusDict[jobID]['Status'] in ["Received", "Checking", "Waiting", "Matched", "Running"]] if set(jobEnabledList) == set(jobList): return S_OK(jobList) # Check that requested job are in Submitting status jobUpdateStatusList = list(jobID for jobID in jobList if jobStatusDict[jobID]['Status'] == "Submitting") if set(jobUpdateStatusList) != set(jobList): return S_ERROR(EWMSSUBM, 'Requested jobs for bulk transaction are not valid') # Update status of all the requested jobs in one transaction result = gJobDB.setJobAttributes(jobUpdateStatusList, ['Status', 'MinorStatus'], ['Received', 'Job accepted']) if not result['OK']: return result self.__sendJobsToOptimizationMind(jobUpdateStatusList) return S_OK(jobUpdateStatusList) ########################################################################### def __checkIfProxyUploadIsRequired(self): result = gProxyManager.userHasProxy(self.ownerDN, self.ownerGroup, validSeconds=18000) if not result['OK']: gLogger.error("Can't check if the user has proxy uploaded:", result['Message']) return True # Check if an upload is required return not result['Value'] ########################################################################### def __getJobList(self, jobInput): """ Evaluate the jobInput into a list of ints :param jobInput: one or more job IDs in int or str form :type jobInput: str or int or list :return : a list of int job IDs """ if isinstance(jobInput, int): return [jobInput] if isinstance(jobInput, basestring): try: ijob = int(jobInput) return [ijob] except BaseException: return [] if isinstance(jobInput, list): try: ljob = [int(x) for x in jobInput] return ljob except BaseException: return [] return [] ########################################################################### types_rescheduleJob = [] def export_rescheduleJob(self, jobIDs): """ Reschedule a single job. If the optional proxy parameter is given it will be used to refresh the proxy in the Proxy Repository :param jobIDList: list of job IDs :return: confirmed job IDs """ jobList = self.__getJobList(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(jobList, RIGHT_RESCHEDULE) for jobID in validJobList: gtaskQueueDB.deleteJob(jobID) # gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) gLogger.debug(str(result)) if not result['OK']: return result gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') if invalidJobList or nonauthJobList: result = S_ERROR('Some jobs failed reschedule') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList return result result = S_OK(validJobList) result['requireProxyUpload'] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() self.__sendJobsToOptimizationMind(validJobList) return result def __deleteJob(self, jobID): """ Delete one job """ result = gJobDB.setJobStatus(jobID, 'Deleted', 'Checking accounting') if not result['OK']: return result result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue') # if it was the last job for the pilot, clear PilotsLogging about it result = gPilotAgentsDB.getPilotsForJobID(jobID) if not result['OK']: gLogger.error("Failed to get Pilots for JobID", result['Message']) return result for pilot in result['Value']: res = gPilotAgentsDB.getJobsForPilot(pilot['PilotID']) if not res['OK']: gLogger.error("Failed to get jobs for pilot", res['Message']) return res if not res['Value']: # if list of jobs for pilot is empty, delete pilot and pilotslogging result = gPilotAgentsDB.getPilotInfo(pilotID=pilot['PilotID']) if not result['OK']: gLogger.error("Failed to get pilot info", result['Message']) return result pilotRef = result[0]['PilotJobReference'] ret = gPilotAgentsDB.deletePilot(pilot['PilotID']) if not ret['OK']: gLogger.error("Failed to delete pilot from PilotAgentsDB", ret['Message']) return ret if enablePilotsLogging: ret = gPilotsLoggingDB.deletePilotsLogging(pilotRef) if not ret['OK']: gLogger.error("Failed to delete pilot logging from PilotAgentsDB", ret['Message']) return ret return S_OK() def __killJob(self, jobID, sendKillCommand=True): """ Kill one job """ if sendKillCommand: result = gJobDB.setJobCommand(jobID, 'Kill') if not result['OK']: return result gLogger.info('Job %d is marked for termination' % jobID) result = gJobDB.setJobStatus(jobID, 'Killed', 'Marked for termination') if not result['OK']: gLogger.warn('Failed to set job Killed status', result['Message']) result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue', result['Message']) return S_OK() def __kill_delete_jobs(self, jobIDList, right): """ Kill or delete jobs as necessary """ jobList = self.__getJobList(jobIDList) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDList)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(jobList, right) # Get job status to see what is to be killed or deleted result = gJobDB.getAttributesForJobList(validJobList, ['Status']) if not result['OK']: return result killJobList = [] deleteJobList = [] markKilledJobList = [] stagingJobList = [] for jobID, sDict in result['Value'].items(): if sDict['Status'] in ['Running', 'Matched', 'Stalled']: killJobList.append(jobID) elif sDict['Status'] in ['Done', 'Failed', 'Killed']: if not right == RIGHT_KILL: deleteJobList.append(jobID) else: markKilledJobList.append(jobID) if sDict['Status'] in ['Staging']: stagingJobList.append(jobID) badIDs = [] for jobID in markKilledJobList: result = self.__killJob(jobID, sendKillCommand=False) if not result['OK']: badIDs.append(jobID) for jobID in killJobList: result = self.__killJob(jobID) if not result['OK']: badIDs.append(jobID) for jobID in deleteJobList: result = self.__deleteJob(jobID) if not result['OK']: badIDs.append(jobID) if stagingJobList: stagerClient = StorageManagerClient() gLogger.info('Going to send killing signal to stager as well!') result = stagerClient.killTasksBySourceTaskID(stagingJobList) if not result['OK']: gLogger.warn('Failed to kill some Stager tasks: %s' % result['Message']) if nonauthJobList or badIDs: result = S_ERROR('Some jobs failed deletion') if nonauthJobList: gLogger.warn("Non-authorized JobIDs won't be deleted", str(nonauthJobList)) result['NonauthorizedJobIDs'] = nonauthJobList if badIDs: gLogger.warn("JobIDs failed to be deleted", str(badIDs)) result['FailedJobIDs'] = badIDs return result result = S_OK(validJobList) result['requireProxyUpload'] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() if invalidJobList: result['InvalidJobIDs'] = invalidJobList return result ########################################################################### types_deleteJob = [] def export_deleteJob(self, jobIDs): """ Delete jobs specified in the jobIDs list :param jobIDList: list of job IDs :return: S_OK/S_ERROR """ return self.__kill_delete_jobs(jobIDs, RIGHT_DELETE) ########################################################################### types_killJob = [] def export_killJob(self, jobIDs): """ Kill jobs specified in the jobIDs list :param jobIDList: list of job IDs :return: S_OK/S_ERROR """ return self.__kill_delete_jobs(jobIDs, RIGHT_KILL) ########################################################################### types_resetJob = [] def export_resetJob(self, jobIDs): """ Reset jobs specified in the jobIDs list :param jobIDList: list of job IDs :return: S_OK/S_ERROR """ jobList = self.__getJobList(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(jobList, RIGHT_RESET) badIDs = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobAttribute(jobID, 'RescheduleCounter', -1) if not result['OK']: badIDs.append(jobID) else: gtaskQueueDB.deleteJob(jobID) # gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) if not result['OK']: badIDs.append(jobID) else: good_ids.append(jobID) gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') self.__sendJobsToOptimizationMind(good_ids) if invalidJobList or nonauthJobList or badIDs: result = S_ERROR('Some jobs failed resetting') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if badIDs: result['FailedJobIDs'] = badIDs return result result = S_OK() result['requireProxyUpload'] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result
class JobManagerHandler(RequestHandler): @classmethod def initializeHandler(cls, serviceInfoDict): cls.msgClient = MessageClient("WorkloadManagement/OptimizationMind") cls.__connectToOptMind() gThreadScheduler.addPeriodicTask(60, cls.__connectToOptMind) return S_OK() @classmethod def __connectToOptMind(cls): if not cls.msgClient.connected: result = cls.msgClient.connect(JobManager=True) if not result['OK']: cls.log.warn("Cannot connect to OptimizationMind!", result['Message']) def initialize(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] self.userProperties = credDict['properties'] self.owner = credDict['username'] self.peerUsesLimitedProxy = credDict['isLimitedProxy'] self.diracSetup = self.serviceInfoDict['clientSetup'] self.maxParametricJobs = self.srv_getCSOption('MaxParametricJobs', MAX_PARAMETRIC_JOBS) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.userProperties) self.jobPolicy.setJobDB(gJobDB) return S_OK() def __sendNewJobsToMind(self, jids): if not self.msgClient.connected: return result = self.msgClient.createMessage("OptimizeJobs") if not result['OK']: self.log.error("Cannot create Optimize message: %s" % result['Message']) return msgObj = result['Value'] msgObj.jids = jids result = self.msgClient.sendMessage(msgObj) if not result['OK']: self.log.error("Cannot send Optimize message: %s" % result['Message']) return self.log.info("Optimize msg sent for %s jobs" % len(jids)) ########################################################################### types_submitJob = [StringType] def export_submitJob(self, jobDesc): """ Submit a single job to DIRAC WMS """ if self.peerUsesLimitedProxy: return S_ERROR("Can't submit using a limited proxy! (bad boy!)") # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result['OK']: return S_ERROR('Failed to get job policies') policyDict = result['Value'] if not policyDict[RIGHT_SUBMIT]: return S_ERROR('Job submission not authorized') #jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parameteric one jobClassAd = ClassAd(jobDesc) parametricJob = False if jobClassAd.lookupAttribute('Parameters'): parametricJob = True if jobClassAd.isAttributeList('Parameters'): parameterList = jobClassAd.getListFromExpression('Parameters') else: pStep = 0 pFactor = 1 pStart = 1 nParameters = jobClassAd.getAttributeInt('Parameters') if not nParameters: value = jobClassAd.get_expression('Parameters') return S_ERROR( 'Illegal value for Parameters JDL field: %s' % value) if jobClassAd.lookupAttribute('ParameterStart'): value = jobClassAd.get_expression( 'ParameterStart').replace('"', '') try: pStart = int(value) except: try: pStart = float(value) except: return S_ERROR( 'Illegal value for ParameterStart JDL field: %s' % value) if jobClassAd.lookupAttribute('ParameterStep'): pStep = jobClassAd.getAttributeInt('ParameterStep') if not pStep: pStep = jobClassAd.getAttributeFloat('ParameterStep') if not pStep: value = jobClassAd.get_expression('ParameterStep') return S_ERROR( 'Illegal value for ParameterStep JDL field: %s' % value) if jobClassAd.lookupAttribute('ParameterFactor'): pFactor = jobClassAd.getAttributeInt('ParameterFactor') if not pFactor: pFactor = jobClassAd.getAttributeFloat( 'ParameterFactor') if not pFactor: value = jobClassAd.get_expression( 'ParameterFactor') return S_ERROR( 'Illegal value for ParameterFactor JDL field: %s' % value) parameterList = list() parameterList.append(pStart) for i in range(nParameters - 1): parameterList.append(parameterList[i] * pFactor + pStep) if len(parameterList) > self.maxParametricJobs: return S_ERROR( 'The number of parametric jobs exceeded the limit of %d' % self.maxParametricJobs) jobDescList = [] nParam = len(parameterList) - 1 for n, p in enumerate(parameterList): newJobDesc = jobDesc.replace('%s', str(p)).replace( '%n', str(n).zfill(len(str(nParam)))) newClassAd = ClassAd(newJobDesc) for attr in ['Parameters', 'ParameterStep', 'ParameterFactor']: newClassAd.deleteAttribute(attr) if type(p) == type(' ') and p.startswith('{'): newClassAd.insertAttributeInt('Parameter', str(p)) else: newClassAd.insertAttributeString('Parameter', str(p)) newClassAd.insertAttributeInt('ParameterNumber', n) newJDL = newClassAd.asJDL() jobDescList.append(newJDL) else: jobDescList = [jobDesc] jobIDList = [] for jobDescription in jobDescList: result = gJobDB.insertNewJobIntoDB(jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup) if not result['OK']: return result jobID = result['JobID'] gLogger.info('Job %s added to the JobDB for %s/%s' % (jobID, self.ownerDN, self.ownerGroup)) gJobLoggingDB.addLoggingRecord(jobID, result['Status'], result['MinorStatus'], source='JobManager') jobIDList.append(jobID) #Set persistency flag retVal = gProxyManager.getUserPersistence(self.ownerDN, self.ownerGroup) if 'Value' not in retVal or not retVal['Value']: gProxyManager.setPersistency(self.ownerDN, self.ownerGroup, True) if parametricJob: result = S_OK(jobIDList) else: result = S_OK(jobIDList[0]) result['JobID'] = result['Value'] result['requireProxyUpload'] = self.__checkIfProxyUploadIsRequired() self.__sendNewJobsToMind(jobIDList) return result ########################################################################### def __checkIfProxyUploadIsRequired(self): result = gProxyManager.userHasProxy(self.ownerDN, self.ownerGroup, validSeconds=18000) if not result['OK']: gLogger.error("Can't check if the user has proxy uploaded:", result['Message']) return True #Check if an upload is required return result['Value'] == False ########################################################################### types_invalidateJob = [IntType] def invalidateJob(self, jobID): """ Make job with jobID invalid, e.g. because of the sandbox submission errors. """ pass ########################################################################### def __get_job_list(self, jobInput): """ Evaluate the jobInput into a list of ints """ if type(jobInput) == IntType: return [jobInput] if type(jobInput) == StringType: try: ijob = int(jobInput) return [ijob] except: return [] if type(jobInput) == ListType: try: ljob = [int(x) for x in jobInput] return ljob except: return [] return [] ########################################################################### types_rescheduleJob = [] def export_rescheduleJob(self, jobIDs): """ Reschedule a single job. If the optional proxy parameter is given it will be used to refresh the proxy in the Proxy Repository """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESCHEDULE) for jobID in validJobList: gtaskQueueDB.deleteJob(jobID) #gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) gLogger.debug(str(result)) if not result['OK']: return result gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') if invalidJobList or nonauthJobList: result = S_ERROR('Some jobs failed deletion') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList return result result = S_OK(validJobList) result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() self.__sendNewJobsToMind(validJobList) return result def __deleteJob(self, jobID): """ Delete one job """ result = gJobDB.setJobStatus(jobID, 'Deleted', 'Checking accounting') if not result['OK']: return result result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue') return S_OK() def __killJob(self, jobID): """ Kill one job """ result = gJobDB.setJobCommand(jobID, 'Kill') if not result['OK']: return result else: gLogger.info('Job %d is marked for termination' % jobID) result = gJobDB.setJobStatus(jobID, 'Killed', 'Marked for termination') if not result['OK']: gLogger.warn('Failed to set job Killed status') result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue') return S_OK() def __kill_delete_jobs(self, jobIDList, right): """ Kill or delete jobs as necessary """ jobList = self.__get_job_list(jobIDList) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDList)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, right) # Get job status to see what is to be killed or deleted result = gJobDB.getAttributesForJobList(validJobList, ['Status']) if not result['OK']: return result killJobList = [] deleteJobList = [] for jobID, sDict in result['Value'].items(): if sDict['Status'] in ['Running', 'Matched', 'Stalled']: killJobList.append(jobID) elif sDict['Status'] in ['Done', 'Failed']: if not right == RIGHT_KILL: deleteJobList.append(jobID) else: deleteJobList.append(jobID) bad_ids = [] for jobID in killJobList: result = self.__killJob(jobID) if not result['OK']: bad_ids.append(jobID) for jobID in deleteJobList: result = self.__deleteJob(jobID) if not result['OK']: bad_ids.append(jobID) if invalidJobList or nonauthJobList or bad_ids: result = S_ERROR('Some jobs failed deletion') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK(validJobList) result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result ########################################################################### types_deleteJob = [] def export_deleteJob(self, jobIDs): """ Delete jobs specified in the jobIDs list """ return self.__kill_delete_jobs(jobIDs, RIGHT_DELETE) ########################################################################### types_killJob = [] def export_killJob(self, jobIDs): """ Kill jobs specified in the jobIDs list """ return self.__kill_delete_jobs(jobIDs, RIGHT_KILL) ########################################################################### types_resetJob = [] def export_resetJob(self, jobIDs): """ Reset jobs specified in the jobIDs list """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESET) bad_ids = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobAttribute(jobID, 'RescheduleCounter', -1) if not result['OK']: bad_ids.append(jobID) else: gtaskQueueDB.deleteJob(jobID) #gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) if not result['OK']: bad_ids.append(jobID) else: good_ids.append(jobID) gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') self.__sendNewJobsToMind(good_ids) if invalidJobList or nonauthJobList or bad_ids: result = S_ERROR('Some jobs failed resetting') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK() result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result
class JobMonitoringHandler(RequestHandler): def initialize(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] operations = Operations(group=self.ownerGroup) self.globalJobsInfo = operations.getValue( '/Services/JobMonitoring/GlobalJobsInfo', True) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.globalJobsInfo) self.jobPolicy.setJobDB(gJobDB) return S_OK() ############################################################################## types_getApplicationStates = [] @staticmethod def export_getApplicationStates(): """ Return Distinct Values of ApplicationStatus job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('ApplicationStatus') ############################################################################## types_getJobTypes = [] @staticmethod def export_getJobTypes(): """ Return Distinct Values of JobType job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('JobType') ############################################################################## types_getOwners = [] @staticmethod def export_getOwners(): """ Return Distinct Values of Owner job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('Owner') ############################################################################## types_getProductionIds = [] @staticmethod def export_getProductionIds(): """ Return Distinct Values of ProductionId job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('JobGroup') ############################################################################## types_getJobGroups = [] @staticmethod def export_getJobGroups(condDict=None, cutDate=None): """ Return Distinct Values of ProductionId job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('JobGroup', condDict, newer=cutDate) ############################################################################## types_getSites = [] @staticmethod def export_getSites(): """ Return Distinct Values of Site job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('Site') ############################################################################## types_getStates = [] @staticmethod def export_getStates(): """ Return Distinct Values of Status job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('Status') ############################################################################## types_getMinorStates = [] @staticmethod def export_getMinorStates(): """ Return Distinct Values of Minor Status job Attribute in WMS """ return gJobDB.getDistinctJobAttributes('MinorStatus') ############################################################################## types_getJobs = [] @staticmethod def export_getJobs(attrDict=None, cutDate=None): """ Return list of JobIds matching the condition given in attrDict """ # queryDict = {} # if attrDict: # if type ( attrDict ) != DictType: # return S_ERROR( 'Argument must be of Dict Type' ) # for attribute in self.queryAttributes: # # Only those Attribute in self.queryAttributes can be used # if attrDict.has_key(attribute): # queryDict[attribute] = attrDict[attribute] print attrDict return gJobDB.selectJobs(attrDict, newer=cutDate) ############################################################################## types_getCounters = [ListType] @staticmethod def export_getCounters(attrList, attrDict=None, cutDate=''): """ Retrieve list of distinct attributes values from attrList with attrDict as condition. For each set of distinct values, count number of occurences. Return a list. Each item is a list with 2 items, the list of distinct attribute values and the counter """ # Check that Attributes in attrList and attrDict, they must be in # self.queryAttributes. # for attr in attrList: # try: # self.queryAttributes.index(attr) # except: # return S_ERROR( 'Requested Attribute not Allowed: %s.' % attr ) # # for attr in attrDict: # try: # self.queryAttributes.index(attr) # except: # return S_ERROR( 'Condition Attribute not Allowed: %s.' % attr ) cutDate = str(cutDate) if not attrDict: attrDict = {} return gJobDB.getCounters('Jobs', attrList, attrDict, newer=cutDate, timeStamp='LastUpdateTime') ############################################################################## types_getCurrentJobCounters = [] @staticmethod def export_getCurrentJobCounters(attrDict=None): """ Get job counters per Status with attrDict selection. Final statuses are given for the last day. """ if not attrDict: attrDict = {} result = gJobDB.getCounters('Jobs', ['Status'], attrDict, timeStamp='LastUpdateTime') if not result['OK']: return result last_update = Time.dateTime() - Time.day resultDay = gJobDB.getCounters('Jobs', ['Status'], attrDict, newer=last_update, timeStamp='LastUpdateTime') if not resultDay['OK']: return resultDay resultDict = {} for statusDict, count in result['Value']: status = statusDict['Status'] resultDict[status] = count if status in FINAL_STATES: resultDict[status] = 0 for statusDayDict, ccount in resultDay['Value']: if status == statusDayDict['Status']: resultDict[status] = ccount break return S_OK(resultDict) ############################################################################## types_getJobStatus = [IntType] @staticmethod def export_getJobStatus(jobID): return gJobDB.getJobAttribute(jobID, 'Status') ############################################################################## types_getJobOwner = [IntType] @staticmethod def export_getJobOwner(jobID): return gJobDB.getJobAttribute(jobID, 'Owner') ############################################################################## types_getJobSite = [IntType] @staticmethod def export_getJobSite(jobID): return gJobDB.getJobAttribute(jobID, 'Site') ############################################################################## types_getJobJDL = [IntType, BooleanType] @staticmethod def export_getJobJDL(jobID, original): return gJobDB.getJobJDL(jobID, original=original) ############################################################################## types_getJobLoggingInfo = [IntType] @staticmethod def export_getJobLoggingInfo(jobID): return gJobLoggingDB.getJobLoggingInfo(jobID) ############################################################################## types_getJobsParameters = [ListType, ListType] @staticmethod def export_getJobsParameters(jobIDs, parameters): if not (jobIDs and parameters): return S_OK({}) return gJobDB.getAttributesForJobList(jobIDs, parameters) ############################################################################## types_getJobsStatus = [ListType] @staticmethod def export_getJobsStatus(jobIDs): if not jobIDs: return S_OK({}) return gJobDB.getAttributesForJobList(jobIDs, ['Status']) ############################################################################## types_getJobsMinorStatus = [ListType] @staticmethod def export_getJobsMinorStatus(jobIDs): return gJobDB.getAttributesForJobList(jobIDs, ['MinorStatus']) ############################################################################## types_getJobsApplicationStatus = [ListType] @staticmethod def export_getJobsApplicationStatus(jobIDs): return gJobDB.getAttributesForJobList(jobIDs, ['ApplicationStatus']) ############################################################################## types_getJobsSites = [ListType] @staticmethod def export_getJobsSites(jobIDs): return gJobDB.getAttributesForJobList(jobIDs, ['Site']) ############################################################################## types_getJobSummary = [IntType] @staticmethod def export_getJobSummary(jobID): return gJobDB.getJobAttributes(jobID, SUMMARY) ############################################################################## types_getJobPrimarySummary = [IntType] @staticmethod def export_getJobPrimarySummary(jobID): return gJobDB.getJobAttributes(jobID, PRIMARY_SUMMARY) ############################################################################## types_getJobsSummary = [ListType] @staticmethod def export_getJobsSummary(jobIDs): if not jobIDs: return S_ERROR( 'JobMonitoring.getJobsSummary: Received empty job list') result = gJobDB.getAttributesForJobList(jobIDs, SUMMARY) # return result restring = str(result['Value']) return S_OK(restring) ############################################################################## types_getJobPageSummaryWeb = [DictType, ListType, IntType, IntType] def export_getJobPageSummaryWeb(self, selectDict, sortList, startItem, maxItems, selectJobs=True): """ Get the summary of the job information for a given page in the job monitor in a generic format """ resultDict = {} startDate = selectDict.get('FromDate', None) if startDate: del selectDict['FromDate'] # For backward compatibility if startDate is None: startDate = selectDict.get('LastUpdate', None) if startDate: del selectDict['LastUpdate'] endDate = selectDict.get('ToDate', None) if endDate: del selectDict['ToDate'] result = self.jobPolicy.getControlledUsers(RIGHT_GET_INFO) if not result['OK']: return S_ERROR('Failed to evaluate user rights') if result['Value'] != 'ALL': selectDict[('Owner', 'OwnerGroup')] = result['Value'] # Sorting instructions. Only one for the moment. if sortList: orderAttribute = sortList[0][0] + ":" + sortList[0][1] else: orderAttribute = None statusDict = {} result = gJobDB.getCounters('Jobs', ['Status'], selectDict, newer=startDate, older=endDate, timeStamp='LastUpdateTime') nJobs = 0 if result['OK']: for stDict, count in result['Value']: nJobs += count statusDict[stDict['Status']] = count resultDict['TotalRecords'] = nJobs if nJobs == 0: return S_OK(resultDict) resultDict['Extras'] = statusDict if selectJobs: iniJob = startItem if iniJob >= nJobs: return S_ERROR('Item number out of range') result = gJobDB.selectJobs(selectDict, orderAttribute=orderAttribute, newer=startDate, older=endDate, limit=(maxItems, iniJob)) if not result['OK']: return S_ERROR('Failed to select jobs: ' + result['Message']) summaryJobList = result['Value'] if not self.globalJobsInfo: validJobs, _invalidJobs, _nonauthJobs, _ownJobs = self.jobPolicy.evaluateJobRights( summaryJobList, RIGHT_GET_INFO) summaryJobList = validJobs result = gJobDB.getAttributesForJobList(summaryJobList, SUMMARY) if not result['OK']: return S_ERROR('Failed to get job summary: ' + result['Message']) summaryDict = result['Value'] # Evaluate last sign of life time for jobID, jobDict in summaryDict.items(): if jobDict['HeartBeatTime'] == 'None': jobDict['LastSignOfLife'] = jobDict['LastUpdateTime'] else: lastTime = Time.fromString(jobDict['LastUpdateTime']) hbTime = Time.fromString(jobDict['HeartBeatTime']) if (hbTime - lastTime) > ( lastTime - lastTime) or jobDict['Status'] == "Stalled": jobDict['LastSignOfLife'] = jobDict['HeartBeatTime'] else: jobDict['LastSignOfLife'] = jobDict['LastUpdateTime'] tqDict = {} result = gTaskQueueDB.getTaskQueueForJobs(summaryJobList) if result['OK']: tqDict = result['Value'] # If no jobs can be selected after the properties check if not summaryDict.keys(): return S_OK(resultDict) # prepare the standard structure now key = summaryDict.keys()[0] paramNames = summaryDict[key].keys() records = [] for jobID, jobDict in summaryDict.items(): jParList = [] for pname in paramNames: jParList.append(jobDict[pname]) jParList.append(tqDict.get(jobID, 0)) records.append(jParList) resultDict['ParameterNames'] = paramNames + ['TaskQueueID'] resultDict['Records'] = records return S_OK(resultDict) ############################################################################## types_getJobStats = [StringTypes, DictType] @staticmethod def export_getJobStats(attribute, selectDict): """ Get job statistics distribution per attribute value with a given selection """ startDate = selectDict.get('FromDate', None) if startDate: del selectDict['FromDate'] # For backward compatibility if startDate is None: startDate = selectDict.get('LastUpdate', None) if startDate: del selectDict['LastUpdate'] endDate = selectDict.get('ToDate', None) if endDate: del selectDict['ToDate'] result = gJobDB.getCounters('Jobs', [attribute], selectDict, newer=startDate, older=endDate, timeStamp='LastUpdateTime') resultDict = {} if result['OK']: for cDict, count in result['Value']: resultDict[cDict[attribute]] = count return S_OK(resultDict) ############################################################################## types_getJobsPrimarySummary = [ListType] @staticmethod def export_getJobsPrimarySummary(jobIDs): return gJobDB.getAttributesForJobList(jobIDs, PRIMARY_SUMMARY) ############################################################################## types_getJobParameter = [[StringType, IntType, LongType], StringTypes] @staticmethod def export_getJobParameter(jobID, parName): return gJobDB.getJobParameters(jobID, [parName]) ############################################################################## types_getJobParameters = [[IntType, LongType]] @staticmethod def export_getJobParameters(jobID): return gJobDB.getJobParameters(jobID) ############################################################################## types_traceJobParameter = [ StringTypes, [IntType, StringType, LongType, ListType], StringTypes, [StringType, NoneType], [StringType, NoneType] ] @staticmethod def export_traceJobParameter(site, localID, parameter, date, until): return gJobDB.traceJobParameter(site, localID, parameter, date, until) ############################################################################## types_traceJobParameters = [ StringTypes, [IntType, StringType, LongType, ListType], [ListType, NoneType], [ListType, NoneType], [StringType, NoneType], [StringType, NoneType] ] @staticmethod def export_traceJobParameters(site, localID, parameterList, attributeList, date, until): return gJobDB.traceJobParameters(site, localID, parameterList, attributeList, date, until) ############################################################################## types_getAtticJobParameters = [[IntType, LongType]] @staticmethod def export_getAtticJobParameters(jobID, parameters=None, rescheduleCycle=-1): if not parameters: parameters = [] return gJobDB.getAtticJobParameters(jobID, parameters, rescheduleCycle) ############################################################################## types_getJobAttributes = [IntType] @staticmethod def export_getJobAttributes(jobID): return gJobDB.getJobAttributes(jobID) ############################################################################## types_getJobAttribute = [IntType, StringTypes] @staticmethod def export_getJobAttribute(jobID, attribute): return gJobDB.getJobAttribute(jobID, attribute) ############################################################################## types_getSiteSummary = [] @staticmethod def export_getSiteSummary(): return gJobDB.getSiteSummary() ############################################################################## types_getJobHeartBeatData = [IntType] @staticmethod def export_getJobHeartBeatData(jobID): return gJobDB.getHeartBeatData(jobID) ############################################################################## types_getInputData = [[IntType, LongType]] @staticmethod def export_getInputData(jobID): """ Get input data for the specified jobs """ return gJobDB.getInputData(jobID) ############################################################################## types_getOwnerGroup = [] @staticmethod def export_getOwnerGroup(): """ Return Distinct Values of OwnerGroup from the JobsDB """ return gJobDB.getDistinctJobAttributes('OwnerGroup')
class JobMonitoringHandler( RequestHandler ): def initialize( self ): global jobDB credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] self.userProperties = credDict[ 'properties' ] self.jobPolicy = JobPolicy( self.ownerDN, self.ownerGroup, self.userProperties ) self.jobPolicy.setJobDB( jobDB ) return S_OK() ############################################################################## types_getApplicationStates = [] def export_getApplicationStates ( self ): """ Return Distict Values of ApplicationStatus job Attribute in WMS """ return jobDB.getDistinctJobAttributes( 'ApplicationStatus' ) ############################################################################## types_getJobTypes = [] def export_getJobTypes ( self ): """ Return Distict Values of JobType job Attribute in WMS """ return jobDB.getDistinctJobAttributes( 'JobType' ) ############################################################################## types_getOwners = [] def export_getOwners ( self ): """ Return Distict Values of Owner job Attribute in WMS """ return jobDB.getDistinctJobAttributes( 'Owner' ) ############################################################################## types_getProductionIds = [] def export_getProductionIds ( self ): """ Return Distict Values of ProductionId job Attribute in WMS """ return jobDB.getDistinctJobAttributes( 'JobGroup' ) ############################################################################## types_getJobGroups = [] def export_getJobGroups( self, condDict = None, cutDate = None ): """ Return Distict Values of ProductionId job Attribute in WMS """ return jobDB.getDistinctJobAttributes( 'JobGroup', condDict, newer = cutDate ) ############################################################################## types_getSites = [] def export_getSites ( self ): """ Return Distict Values of Site job Attribute in WMS """ return jobDB.getDistinctJobAttributes( 'Site' ) ############################################################################## types_getStates = [] def export_getStates ( self ): """ Return Distict Values of Status job Attribute in WMS """ return jobDB.getDistinctJobAttributes( 'Status' ) ############################################################################## types_getMinorStates = [] def export_getMinorStates ( self ): """ Return Distinct Values of Minor Status job Attribute in WMS """ return jobDB.getDistinctJobAttributes( 'MinorStatus' ) ############################################################################## types_getJobs = [] def export_getJobs ( self, attrDict = None, cutDate = None ): """ Return list of JobIds matching the condition given in attrDict """ queryDict = {} #if attrDict: # if type ( attrDict ) != DictType: # return S_ERROR( 'Argument must be of Dict Type' ) # for attribute in self.queryAttributes: # # Only those Attribute in self.queryAttributes can be used # if attrDict.has_key(attribute): # queryDict[attribute] = attrDict[attribute] print attrDict return jobDB.selectJobs( attrDict, newer = cutDate ) ############################################################################## types_getCounters = [ ListType ] def export_getCounters( self, attrList, attrDict = {}, cutDate = '' ): """ Retrieve list of distinct attributes values from attrList with attrDict as condition. For each set of distinct values, count number of occurences. Return a list. Each item is a list with 2 items, the list of distinct attribute values and the counter """ # Check that Attributes in attrList and attrDict, they must be in # self.queryAttributes. #for attr in attrList: # try: # self.queryAttributes.index(attr) # except: # return S_ERROR( 'Requested Attribute not Allowed: %s.' % attr ) # #for attr in attrDict: # try: # self.queryAttributes.index(attr) # except: # return S_ERROR( 'Condition Attribute not Allowed: %s.' % attr ) cutdate = str( cutDate ) return jobDB.getCounters( 'Jobs', attrList, attrDict, newer = cutDate, timeStamp = 'LastUpdateTime' ) ############################################################################## types_getCurrentJobCounters = [ ] def export_getCurrentJobCounters( self, attrDict = {} ): """ Get job counters per Status with attrDict selection. Final statuses are given for the last day. """ result = jobDB.getCounters( 'Jobs', ['Status'], attrDict, timeStamp = 'LastUpdateTime' ) if not result['OK']: return result last_update = Time.dateTime() - Time.day resultDay = jobDB.getCounters( 'Jobs', ['Status'], attrDict, newer = last_update, timeStamp = 'LastUpdateTime' ) if not resultDay['OK']: return resultDay resultDict = {} for statusDict, count in result['Value']: status = statusDict['Status'] resultDict[status] = count if status in FINAL_STATES: resultDict[status] = 0 for statusDayDict, ccount in resultDay['Value']: if status == statusDayDict['Status']: resultDict[status] = ccount break return S_OK( resultDict ) ############################################################################## types_getJobStatus = [ IntType ] def export_getJobStatus ( self, jobID ): return jobDB.getJobAttribute( jobID, 'Status' ) ############################################################################## types_getJobOwner = [ IntType ] def export_getJobOwner ( self, jobID ): return jobDB.getJobAttribute( jobID, 'Owner' ) ############################################################################## types_getJobSite = [ IntType ] def export_getJobSite ( self, jobID ): return jobDB.getJobAttribute( jobID, 'Site' ) ############################################################################## types_getJobJDL = [ IntType ] def export_getJobJDL ( self, jobID ): result = jobDB.getJobJDL( jobID ) return result ############################################################################## types_getJobLoggingInfo = [ IntType ] def export_getJobLoggingInfo( self, jobID ): return jobLoggingDB.getJobLoggingInfo( jobID ) ############################################################################## types_getJobsStatus = [ ListType ] def export_getJobsStatus ( self, jobIDs ): if not jobIDs: return S_OK( {} ) return jobDB.getAttributesForJobList( jobIDs, ['Status'] ) ############################################################################## types_getJobsMinorStatus = [ ListType ] def export_getJobsMinorStatus ( self, jobIDs ): return jobDB.getAttributesForJobList( jobIDs, ['MinorStatus'] ) ############################################################################## types_getJobsApplicationStatus = [ ListType ] def export_getJobsApplicationStatus ( self, jobIDs ): return jobDB.getAttributesForJobList( jobIDs, ['ApplicationStatus'] ) ############################################################################## types_getJobsSites = [ ListType ] def export_getJobsSites ( self, jobIDs ): return jobDB.getAttributesForJobList( jobIDs, ['Site'] ) ############################################################################## types_getJobSummary = [ IntType ] def export_getJobSummary( self, jobID ): return jobDB.getJobAttributes( jobID, SUMMARY ) ############################################################################## types_getJobPrimarySummary = [ IntType ] def export_getJobPrimarySummary( self, jobID ): return jobDB.getJobAttributes( jobID, PRIMARY_SUMMARY ) ############################################################################## types_getJobsSummary = [ ListType ] def export_getJobsSummary( self, jobIDs ): if not jobIDs: return S_ERROR( 'JobMonitoring.getJobsSummary: Received empty job list' ) result = jobDB.getAttributesForJobList( jobIDs, SUMMARY ) #return result restring = str( result['Value'] ) return S_OK( restring ) ############################################################################## types_getJobPageSummaryWeb = [DictType, ListType, IntType, IntType] def export_getJobPageSummaryWeb( self, selectDict, sortList, startItem, maxItems, selectJobs = True ): """ Get the summary of the job information for a given page in the job monitor in a generic format """ resultDict = {} startDate = selectDict.get( 'FromDate', None ) if startDate: del selectDict['FromDate'] # For backward compatibility if startDate is None: startDate = selectDict.get( 'LastUpdate', None ) if startDate: del selectDict['LastUpdate'] endDate = selectDict.get( 'ToDate', None ) if endDate: del selectDict['ToDate'] # Sorting instructions. Only one for the moment. if sortList: orderAttribute = sortList[0][0] + ":" + sortList[0][1] else: orderAttribute = None if selectJobs: result = jobDB.selectJobs( selectDict, orderAttribute = orderAttribute, newer = startDate, older = endDate ) if not result['OK']: return S_ERROR( 'Failed to select jobs: ' + result['Message'] ) jobList = result['Value'] # A.T. This needs optimization #validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, # RIGHT_GET_INFO ) #jobList = validJobList nJobs = len( jobList ) resultDict['TotalRecords'] = nJobs if nJobs == 0: return S_OK( resultDict ) iniJob = startItem lastJob = iniJob + maxItems if iniJob >= nJobs: return S_ERROR( 'Item number out of range' ) if lastJob > nJobs: lastJob = nJobs summaryJobList = jobList[iniJob:lastJob] result = jobDB.getAttributesForJobList( summaryJobList, SUMMARY ) if not result['OK']: return S_ERROR( 'Failed to get job summary: ' + result['Message'] ) summaryDict = result['Value'] # Evaluate last sign of life time for jobID, jobDict in summaryDict.items(): if jobDict['HeartBeatTime'] == 'None': jobDict['LastSignOfLife'] = jobDict['LastUpdateTime'] else: lastTime = Time.fromString( jobDict['LastUpdateTime'] ) hbTime = Time.fromString( jobDict['HeartBeatTime'] ) if ( hbTime - lastTime ) > ( lastTime - lastTime ) or jobDict['Status'] == "Stalled": jobDict['LastSignOfLife'] = jobDict['HeartBeatTime'] else: jobDict['LastSignOfLife'] = jobDict['LastUpdateTime'] tqDict = {} result = taskQueueDB.getTaskQueueForJobs( summaryJobList ) if result['OK']: tqDict = result['Value'] # prepare the standard structure now key = summaryDict.keys()[0] paramNames = summaryDict[key].keys() records = [] for jobID, jobDict in summaryDict.items(): jParList = [] for pname in paramNames: jParList.append( jobDict[pname] ) if tqDict and tqDict.has_key( jobID ): jParList.append( tqDict[jobID] ) else: jParList.append( 0 ) records.append( jParList ) resultDict['ParameterNames'] = paramNames + ['TaskQueueID'] resultDict['Records'] = records statusDict = {} result = jobDB.getCounters( 'Jobs', ['Status'], selectDict, newer = startDate, older = endDate, timeStamp = 'LastUpdateTime' ) if result['OK']: for stDict, count in result['Value']: statusDict[stDict['Status']] = count resultDict['Extras'] = statusDict return S_OK( resultDict ) ############################################################################## types_getJobStats = [ StringTypes, DictType ] def export_getJobStats ( self, attribute, selectDict ): """ Get job statistics distribution per attribute value with a given selection """ startDate = selectDict.get( 'FromDate', None ) if startDate: del selectDict['FromDate'] # For backward compatibility if startDate is None: startDate = selectDict.get( 'LastUpdate', None ) if startDate: del selectDict['LastUpdate'] endDate = selectDict.get( 'ToDate', None ) if endDate: del selectDict['ToDate'] result = jobDB.getCounters( 'Jobs', [attribute], selectDict, newer = startDate, older = endDate, timeStamp = 'LastUpdateTime' ) resultDict = {} if result['OK']: for cDict, count in result['Value']: resultDict[cDict[attribute]] = count return S_OK( resultDict ) ############################################################################## types_getJobsPrimarySummary = [ ListType ] def export_getJobsPrimarySummary ( self, jobIDs ): return jobDB.getAttributesForJobList( jobIDs, PRIMARY_SUMMARY ) ############################################################################## types_getJobParameter = [ [IntType, LongType] , StringType ] def export_getJobParameter( self, jobID, parName ): return jobDB.getJobParameters( jobID, [parName] ) ############################################################################## types_getJobParameters = [ [IntType, LongType] ] def export_getJobParameters( self, jobID ): return jobDB.getJobParameters( jobID ) ############################################################################## types_getAtticJobParameters = [ [IntType, LongType] ] def export_getAtticJobParameters( self, jobID, parameters = [], rescheduleCycle = -1 ): return jobDB.getAtticJobParameters( jobID, parameters, rescheduleCycle ) ############################################################################## types_getJobAttributes = [ IntType ] def export_getJobAttributes( self, jobID ): return jobDB.getJobAttributes( jobID ) ############################################################################## types_getSiteSummary = [ ] def export_getSiteSummary( self ): return jobDB.getSiteSummary() ############################################################################## types_getJobHeartBeatData = [ IntType ] def export_getJobHeartBeatData( self, jobID ): return jobDB.getHeartBeatData( jobID ) ############################################################################## types_getInputData = [ [IntType, LongType] ] def export_getInputData( self, jobID ): """ Get input data for the specified jobs """ return jobDB.getInputData( jobID )
class JobManagerHandler(RequestHandler): @classmethod def initializeHandler(cls, serviceInfoDict): cls.msgClient = MessageClient("WorkloadManagement/OptimizationMind") cls.__connectToOptMind() gThreadScheduler.addPeriodicTask(60, cls.__connectToOptMind) return S_OK() @classmethod def __connectToOptMind(cls): if not cls.msgClient.connected: result = cls.msgClient.connect(JobManager=True) if not result["OK"]: cls.log.warn("Cannot connect to OptimizationMind!", result["Message"]) def initialize(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict["DN"] self.ownerGroup = credDict["group"] self.userProperties = credDict["properties"] self.owner = credDict["username"] self.peerUsesLimitedProxy = credDict["isLimitedProxy"] self.diracSetup = self.serviceInfoDict["clientSetup"] self.maxParametricJobs = self.srv_getCSOption("MaxParametricJobs", MAX_PARAMETRIC_JOBS) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.userProperties) self.jobPolicy.setJobDB(gJobDB) return S_OK() def __sendJobsToOptimizationMind(self, jids): if not self.msgClient.connected: return result = self.msgClient.createMessage("OptimizeJobs") if not result["OK"]: self.log.error("Cannot create Optimize message: %s" % result["Message"]) return msgObj = result["Value"] msgObj.jids = list(sorted(jids)) result = self.msgClient.sendMessage(msgObj) if not result["OK"]: self.log.error("Cannot send Optimize message: %s" % result["Message"]) return self.log.info("Optimize msg sent for %s jobs" % len(jids)) ########################################################################### types_submitJob = [StringTypes] def export_submitJob(self, jobDesc): """ Submit a single job to DIRAC WMS """ if self.peerUsesLimitedProxy: return S_ERROR("Can't submit using a limited proxy! (bad boy!)") # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result["OK"]: return S_ERROR("Failed to get job policies") policyDict = result["Value"] if not policyDict[RIGHT_SUBMIT]: return S_ERROR("Job submission not authorized") # jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parametric one jobClassAd = ClassAd(jobDesc) nParameters = getNumberOfParameters(jobClassAd) parametricJob = False if nParameters > 0: parametricJob = True result = generateParametricJobs(jobClassAd) if not result["OK"]: return result jobDescList = result["Value"] else: jobDescList = [jobDesc] jobIDList = [] for jobDescription in jobDescList: result = gJobDB.insertNewJobIntoDB( jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup ) if not result["OK"]: return result jobID = result["JobID"] gLogger.info("Job %s added to the JobDB for %s/%s" % (jobID, self.ownerDN, self.ownerGroup)) gJobLoggingDB.addLoggingRecord(jobID, result["Status"], result["MinorStatus"], source="JobManager") jobIDList.append(jobID) # Set persistency flag retVal = gProxyManager.getUserPersistence(self.ownerDN, self.ownerGroup) if "Value" not in retVal or not retVal["Value"]: gProxyManager.setPersistency(self.ownerDN, self.ownerGroup, True) if parametricJob: result = S_OK(jobIDList) else: result = S_OK(jobIDList[0]) result["JobID"] = result["Value"] result["requireProxyUpload"] = self.__checkIfProxyUploadIsRequired() self.__sendJobsToOptimizationMind(jobIDList) return result ########################################################################### def __checkIfProxyUploadIsRequired(self): result = gProxyManager.userHasProxy(self.ownerDN, self.ownerGroup, validSeconds=18000) if not result["OK"]: gLogger.error("Can't check if the user has proxy uploaded:", result["Message"]) return True # Check if an upload is required return result["Value"] == False ########################################################################### types_invalidateJob = [IntType] def invalidateJob(self, jobID): """ Make job with jobID invalid, e.g. because of the sandbox submission errors. """ pass ########################################################################### def __get_job_list(self, jobInput): """ Evaluate the jobInput into a list of ints """ if isinstance(jobInput, int): return [jobInput] if isinstance(jobInput, basestring): try: ijob = int(jobInput) return [ijob] except: return [] if isinstance(jobInput, list): try: ljob = [int(x) for x in jobInput] return ljob except: return [] return [] ########################################################################### types_rescheduleJob = [] def export_rescheduleJob(self, jobIDs): """ Reschedule a single job. If the optional proxy parameter is given it will be used to refresh the proxy in the Proxy Repository """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR("Invalid job specification: " + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESCHEDULE ) for jobID in validJobList: gtaskQueueDB.deleteJob(jobID) # gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) gLogger.debug(str(result)) if not result["OK"]: return result gJobLoggingDB.addLoggingRecord( result["JobID"], result["Status"], result["MinorStatus"], application="Unknown", source="JobManager" ) if invalidJobList or nonauthJobList: result = S_ERROR("Some jobs failed reschedule") if invalidJobList: result["InvalidJobIDs"] = invalidJobList if nonauthJobList: result["NonauthorizedJobIDs"] = nonauthJobList return result result = S_OK(validJobList) result["requireProxyUpload"] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() self.__sendJobsToOptimizationMind(validJobList) return result def __deleteJob(self, jobID): """ Delete one job """ result = gJobDB.setJobStatus(jobID, "Deleted", "Checking accounting") if not result["OK"]: return result result = gtaskQueueDB.deleteJob(jobID) if not result["OK"]: gLogger.warn("Failed to delete job from the TaskQueue") return S_OK() def __killJob(self, jobID, sendKillCommand=True): """ Kill one job """ if sendKillCommand: result = gJobDB.setJobCommand(jobID, "Kill") if not result["OK"]: return result gLogger.info("Job %d is marked for termination" % jobID) result = gJobDB.setJobStatus(jobID, "Killed", "Marked for termination") if not result["OK"]: gLogger.warn("Failed to set job Killed status") result = gtaskQueueDB.deleteJob(jobID) if not result["OK"]: gLogger.warn("Failed to delete job from the TaskQueue") return S_OK() def __kill_delete_jobs(self, jobIDList, right): """ Kill or delete jobs as necessary """ jobList = self.__get_job_list(jobIDList) if not jobList: return S_ERROR("Invalid job specification: " + str(jobIDList)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(jobList, right) # Get job status to see what is to be killed or deleted result = gJobDB.getAttributesForJobList(validJobList, ["Status"]) if not result["OK"]: return result killJobList = [] deleteJobList = [] markKilledJobList = [] stagingJobList = [] for jobID, sDict in result["Value"].items(): if sDict["Status"] in ["Running", "Matched", "Stalled"]: killJobList.append(jobID) elif sDict["Status"] in ["Done", "Failed"]: if not right == RIGHT_KILL: deleteJobList.append(jobID) else: markKilledJobList.append(jobID) if sDict["Status"] in ["Staging"]: stagingJobList.append(jobID) bad_ids = [] for jobID in markKilledJobList: result = self.__killJob(jobID, sendKillCommand=False) if not result["OK"]: bad_ids.append(jobID) for jobID in killJobList: result = self.__killJob(jobID) if not result["OK"]: bad_ids.append(jobID) for jobID in deleteJobList: result = self.__deleteJob(jobID) if not result["OK"]: bad_ids.append(jobID) if stagingJobList: stagerClient = StorageManagerClient() gLogger.info("Going to send killing signal to stager as well!") result = stagerClient.killTasksBySourceTaskID(stagingJobList) if not result["OK"]: gLogger.warn("Failed to kill some Stager tasks: %s" % result["Message"]) if nonauthJobList or bad_ids: result = S_ERROR("Some jobs failed deletion") if nonauthJobList: result["NonauthorizedJobIDs"] = nonauthJobList if bad_ids: result["FailedJobIDs"] = bad_ids return result result = S_OK(validJobList) result["requireProxyUpload"] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() if invalidJobList: result["InvalidJobIDs"] = invalidJobList return result ########################################################################### types_deleteJob = [] def export_deleteJob(self, jobIDs): """ Delete jobs specified in the jobIDs list """ return self.__kill_delete_jobs(jobIDs, RIGHT_DELETE) ########################################################################### types_killJob = [] def export_killJob(self, jobIDs): """ Kill jobs specified in the jobIDs list """ return self.__kill_delete_jobs(jobIDs, RIGHT_KILL) ########################################################################### types_resetJob = [] def export_resetJob(self, jobIDs): """ Reset jobs specified in the jobIDs list """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR("Invalid job specification: " + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights( jobList, RIGHT_RESET ) bad_ids = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobAttribute(jobID, "RescheduleCounter", -1) if not result["OK"]: bad_ids.append(jobID) else: gtaskQueueDB.deleteJob(jobID) # gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) if not result["OK"]: bad_ids.append(jobID) else: good_ids.append(jobID) gJobLoggingDB.addLoggingRecord( result["JobID"], result["Status"], result["MinorStatus"], application="Unknown", source="JobManager" ) self.__sendJobsToOptimizationMind(good_ids) if invalidJobList or nonauthJobList or bad_ids: result = S_ERROR("Some jobs failed resetting") if invalidJobList: result["InvalidJobIDs"] = invalidJobList if nonauthJobList: result["NonauthorizedJobIDs"] = nonauthJobList if bad_ids: result["FailedJobIDs"] = bad_ids return result result = S_OK() result["requireProxyUpload"] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result
class JobManagerHandler(RequestHandler): def initialize(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] self.userProperties = credDict['properties'] self.owner = credDict['username'] self.peerUsesLimitedProxy = credDict['isLimitedProxy'] self.diracSetup = self.serviceInfoDict['clientSetup'] serviceSectionPath = self.serviceInfoDict['serviceSectionPath'] self.maxParametricJobs = gConfig.getValue( '%s/MaxParametricJobs' % serviceSectionPath, MAX_PARAMETRIC_JOBS) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.userProperties) ########################################################################### types_submitJob = [StringType] def export_submitJob(self, jobDesc): """ Submit a single job to DIRAC WMS """ if self.peerUsesLimitedProxy: return S_ERROR("Can't submit using a limited proxy! (bad boy!)") # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result['OK']: return S_ERROR('Failed to get job policies') policyDict = result['Value'] if not policyDict[RIGHT_SUBMIT]: return S_ERROR('Job submission not authorized') #jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parameteric one jobClassAd = ClassAd(jobDesc) parametricJob = False if jobClassAd.lookupAttribute('Parameters'): parametricJob = True if jobClassAd.isAttributeList('Parameters'): parameterList = jobClassAd.getListFromExpression('Parameters') else: pStep = 0 pFactor = 1 nParameters = jobClassAd.getAttributeInt('Parameters') if not nParameters: value = jobClassAd.get_expression('Parameters') return S_ERROR( 'Illegal value for Parameters JDL field: %s' % value) if jobClassAd.lookupAttribute('ParameterStart'): value = jobClassAd.get_expression( 'ParameterStart').replace('"', '') try: pStart = int(value) except: try: pStart = float(value) except: return S_ERROR( 'Illegal value for ParameterStart JDL field: %s' % value) if jobClassAd.lookupAttribute('ParameterStep'): pStep = jobClassAd.getAttributeInt('ParameterStep') if not pStep: pStep = jobClassAd.getAttributeFloat('ParameterStep') if not pStep: value = jobClassAd.get_expression('ParameterStep') return S_ERROR( 'Illegal value for ParameterStep JDL field: %s' % value) if jobClassAd.lookupAttribute('ParameterFactor'): pFactor = jobClassAd.getAttributeInt('ParameterFactor') if not pFactor: pFactor = jobClassAd.getAttributeFloat( 'ParameterFactor') if not pFactor: value = jobClassAd.get_expression( 'ParameterFactor') return S_ERROR( 'Illegal value for ParameterFactor JDL field: %s' % value) parameterList = list() parameterList.append(pStart) for i in range(nParameters - 1): parameterList.append(parameterList[i] * pFactor + pStep) if len(parameterList) > self.maxParametricJobs: return S_ERROR( 'The number of parametric jobs exceeded the limit of %d' % self.maxParametricJobs) jobDescList = [] for n, p in enumerate(parameterList): jobDescList.append( jobDesc.replace('%s', str(p)).replace('%n', str(n))) else: jobDescList = [jobDesc] jobIDList = [] for jobDescription in jobDescList: result = gJobDB.insertNewJobIntoDB(jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup) if not result['OK']: return result jobID = result['JobID'] gLogger.info('Job %s added to the JobDB for %s/%s' % (jobID, self.ownerDN, self.ownerGroup)) gJobLoggingDB.addLoggingRecord(jobID, result['Status'], result['MinorStatus'], source='JobManager') jobIDList.append(jobID) #Set persistency flag retVal = gProxyManager.getUserPersistence(self.ownerDN, self.ownerGroup) if 'Value' not in retVal or not retVal['Value']: gProxyManager.setPersistency(self.ownerDN, self.ownerGroup, True) if parametricJob: result = S_OK(jobIDList) else: result = S_OK(jobIDList[0]) result['JobID'] = result['Value'] result['requireProxyUpload'] = self.__checkIfProxyUploadIsRequired() return result ########################################################################### def __checkIfProxyUploadIsRequired(self): result = gProxyManager.userHasProxy(self.ownerDN, self.ownerGroup, validSeconds=18000) if not result['OK']: gLogger.error("Can't check if the user has proxy uploaded:", result['Message']) return True #Check if an upload is required return result['Value'] == False ########################################################################### types_invalidateJob = [IntType] def invalidateJob(self, jobID): """ Make job with jobID invalid, e.g. because of the sandbox submission errors. """ pass ########################################################################### def __get_job_list(self, jobInput): """ Evaluate the jobInput into a list of ints """ if type(jobInput) == IntType: return [jobInput] if type(jobInput) == StringType: try: ijob = int(jobInput) return [ijob] except: return [] if type(jobInput) == ListType: try: ljob = [int(x) for x in jobInput] return ljob except: return [] return [] ########################################################################### def __evaluate_rights(self, jobList, right): """ Get access rights to jobID for the user ownerDN/ownerGroup """ self.jobPolicy.setJobDB(gJobDB) validJobList = [] invalidJobList = [] nonauthJobList = [] ownerJobList = [] for jobID in jobList: result = self.jobPolicy.getUserRightsForJob(jobID) if result['OK']: if result['Value'][right]: validJobList.append(jobID) else: nonauthJobList.append(jobID) if result['UserIsOwner']: ownerJobList.append(jobID) else: invalidJobList.append(jobID) return validJobList, invalidJobList, nonauthJobList, ownerJobList ########################################################################### types_rescheduleJob = [] def export_rescheduleJob(self, jobIDs): """ Reschedule a single job. If the optional proxy parameter is given it will be used to refresh the proxy in the Proxy Repository """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.__evaluate_rights( jobList, RIGHT_RESCHEDULE) for jobID in validJobList: gtaskQueueDB.deleteJob(jobID) #gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) gLogger.debug(str(result)) if not result['OK']: return result gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') if invalidJobList or nonauthJobList: result = S_ERROR('Some jobs failed deletion') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList return result result = S_OK(validJobList) result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result ########################################################################### types_deleteJob = [] def export_deleteJob(self, jobIDs): """ Delete jobs specified in the jobIDs list """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.__evaluate_rights( jobList, RIGHT_DELETE) bad_ids = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobStatus(jobID, 'Deleted', 'Checking accounting') if not result['OK']: bad_ids.append(jobID) else: good_ids.append(jobID) #result = gJobDB.deleteJobFromQueue(jobID) #if not result['OK']: # gLogger.warn('Failed to delete job from the TaskQueue (old)') result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue') if invalidJobList or nonauthJobList: result = S_ERROR('Some jobs failed deletion') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK(validJobList) result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result ########################################################################### types_killJob = [] def export_killJob(self, jobIDs): """ Kill jobs specified in the jobIDs list """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.__evaluate_rights( jobList, RIGHT_KILL) bad_ids = [] good_ids = [] for jobID in validJobList: # kill jobID result = gJobDB.setJobCommand(jobID, 'Kill') if not result['OK']: bad_ids.append(jobID) else: gLogger.info('Job %d is marked for termination' % jobID) good_ids.append(jobID) result = gJobDB.setJobStatus(jobID, 'Killed', 'Marked for termination') if not result['OK']: gLogger.warn('Failed to set job status') #result = gJobDB.deleteJobFromQueue(jobID) #if not result['OK']: # gLogger.warn('Failed to delete job from the TaskQueue (old)') result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue') if invalidJobList or nonauthJobList or bad_ids: result = S_ERROR('Some jobs failed deletion') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK(validJobList) result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result ########################################################################### types_resetJob = [] def export_resetJob(self, jobIDs): """ Reset jobs specified in the jobIDs list """ jobList = self.__get_job_list(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.__evaluate_rights( jobList, RIGHT_RESET) bad_ids = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobAttribute(jobID, 'RescheduleCounter', 0) if not result['OK']: bad_ids.append(jobID) else: gtaskQueueDB.deleteJob(jobID) #gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) if not result['OK']: bad_ids.append(jobID) else: good_ids.append(jobID) gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') if invalidJobList or nonauthJobList or bad_ids: result = S_ERROR('Some jobs failed resetting') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if bad_ids: result['FailedJobIDs'] = bad_ids return result result = S_OK() result['requireProxyUpload'] = len( ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result
class JobManagerHandler(RequestHandler): """ RequestHandler implementation of the JobManager """ @classmethod def initializeHandler(cls, serviceInfoDict): cls.msgClient = MessageClient("WorkloadManagement/OptimizationMind") cls.__connectToOptMind() gThreadScheduler.addPeriodicTask(60, cls.__connectToOptMind) return S_OK() @classmethod def __connectToOptMind(cls): if not cls.msgClient.connected: result = cls.msgClient.connect(JobManager=True) if not result['OK']: cls.log.warn("Cannot connect to OptimizationMind!", result['Message']) def initialize(self): credDict = self.getRemoteCredentials() self.ownerDN = credDict['DN'] self.ownerGroup = credDict['group'] self.userProperties = credDict['properties'] self.owner = credDict['username'] self.peerUsesLimitedProxy = credDict['isLimitedProxy'] self.diracSetup = self.serviceInfoDict['clientSetup'] self.maxParametricJobs = self.srv_getCSOption('MaxParametricJobs', MAX_PARAMETRIC_JOBS) self.jobPolicy = JobPolicy(self.ownerDN, self.ownerGroup, self.userProperties) self.jobPolicy.setJobDB(gJobDB) return S_OK() def __sendJobsToOptimizationMind(self, jids): if not self.msgClient.connected: return result = self.msgClient.createMessage("OptimizeJobs") if not result['OK']: self.log.error("Cannot create Optimize message: %s" % result['Message']) return msgObj = result['Value'] msgObj.jids = list(sorted(jids)) result = self.msgClient.sendMessage(msgObj) if not result['OK']: self.log.error("Cannot send Optimize message: %s" % result['Message']) return self.log.info("Optimize msg sent for %s jobs" % len(jids)) ########################################################################### types_submitJob = [basestring] def export_submitJob(self, jobDesc): """ Submit a job to DIRAC WMS. The job can be a single job, or a parametric job. If it is a parametric job, then the parameters will need to be unpacked. :param str jobDesc: job description JDL (of a single or parametric job) :return: S_OK/S_ERROR, a list of newly created job IDs in case of S_OK. """ if self.peerUsesLimitedProxy: return S_ERROR(EWMSSUBM, "Can't submit using a limited proxy") # Check job submission permission result = self.jobPolicy.getJobPolicy() if not result['OK']: return S_ERROR(EWMSSUBM, 'Failed to get job policies') policyDict = result['Value'] if not policyDict[RIGHT_SUBMIT]: return S_ERROR(EWMSSUBM, 'Job submission not authorized') # jobDesc is JDL for now jobDesc = jobDesc.strip() if jobDesc[0] != "[": jobDesc = "[%s" % jobDesc if jobDesc[-1] != "]": jobDesc = "%s]" % jobDesc # Check if the job is a parametric one jobClassAd = ClassAd(jobDesc) result = getParameterVectorLength(jobClassAd) if not result['OK']: return result nJobs = result['Value'] parametricJob = False if nJobs > 0: # if we are here, then jobDesc was the description of a parametric job. So we start unpacking parametricJob = True if nJobs > self.maxParametricJobs: return S_ERROR(EWMSJDL, "Number of parametric jobs exceeds the limit of %d" % self.maxParametricJobs) result = generateParametricJobs(jobClassAd) if not result['OK']: return result jobDescList = result['Value'] else: # if we are here, then jobDesc was the description of a single job. jobDescList = [jobDesc] jobIDList = [] if parametricJob: initialStatus = 'Submitting' initialMinorStatus = 'Bulk transaction confirmation' else: initialStatus = 'Received' initialMinorStatus = 'Job accepted' for jobDescription in jobDescList: # jobDescList because there might be a list generated by a parametric job result = gJobDB.insertNewJobIntoDB(jobDescription, self.owner, self.ownerDN, self.ownerGroup, self.diracSetup, initialStatus=initialStatus, initialMinorStatus=initialMinorStatus) if not result['OK']: return result jobID = result['JobID'] gLogger.info('Job %s added to the JobDB for %s/%s' % (jobID, self.ownerDN, self.ownerGroup)) gJobLoggingDB.addLoggingRecord(jobID, result['Status'], result['MinorStatus'], source='JobManager') jobIDList.append(jobID) # Set persistency flag retVal = gProxyManager.getUserPersistence(self.ownerDN, self.ownerGroup) if 'Value' not in retVal or not retVal['Value']: gProxyManager.setPersistency(self.ownerDN, self.ownerGroup, True) if parametricJob: result = S_OK(jobIDList) else: result = S_OK(jobIDList[0]) result['JobID'] = result['Value'] result['requireProxyUpload'] = self.__checkIfProxyUploadIsRequired() # Ensure non-parametric jobs (i.e. non-bulk) get sent to optimizer immediately if not parametricJob: self.__sendJobsToOptimizationMind(jobIDList) return result ########################################################################### types_confirmBulkSubmission = [list] def export_confirmBulkSubmission(self, jobIDs): """ Confirm the possibility to proceed with processing of the jobs specified by the jobIDList :param jobIDList: list of job IDs :return: confirmed job IDs """ jobList = self.__getJobList(jobIDs) if not jobList: return S_ERROR(EWMSSUBM, 'Invalid job specification: ' + str(jobIDs)) validJobList, _invalidJobList, _nonauthJobList, _ownerJobList = self.jobPolicy.evaluateJobRights(jobList, RIGHT_SUBMIT) # Check that all the requested jobs are eligible if set(jobList) != set(validJobList): return S_ERROR(EWMSSUBM, 'Requested jobs for bulk transaction are not valid') result = gJobDB.getAttributesForJobList(jobList, ['Status', 'MinorStatus']) if not result['OK']: return S_ERROR(EWMSSUBM, 'Requested jobs for bulk transaction are not valid') jobStatusDict = result['Value'] # Check if the jobs are already activated jobEnabledList = [jobID for jobID in jobList if jobStatusDict[jobID]['Status'] in ["Received", "Checking", "Waiting", "Matched", "Running"]] if set(jobEnabledList) == set(jobList): return S_OK(jobList) # Check that requested job are in Submitting status jobUpdateStatusList = list(jobID for jobID in jobList if jobStatusDict[jobID]['Status'] == "Submitting") if set(jobUpdateStatusList) != set(jobList): return S_ERROR(EWMSSUBM, 'Requested jobs for bulk transaction are not valid') # Update status of all the requested jobs in one transaction result = gJobDB.setJobAttributes(jobUpdateStatusList, ['Status', 'MinorStatus'], ['Received', 'Job accepted']) if not result['OK']: return result self.__sendJobsToOptimizationMind(jobUpdateStatusList) return S_OK(jobUpdateStatusList) ########################################################################### def __checkIfProxyUploadIsRequired(self): result = gProxyManager.userHasProxy(self.ownerDN, self.ownerGroup, validSeconds=18000) if not result['OK']: gLogger.error("Can't check if the user has proxy uploaded:", result['Message']) return True # Check if an upload is required return not result['Value'] ########################################################################### @staticmethod def __getJobList(jobInput): """ Evaluate the jobInput into a list of ints :param jobInput: one or more job IDs in int or str form :type jobInput: str or int or list :return : a list of int job IDs """ if isinstance(jobInput, int): return [jobInput] if isinstance(jobInput, basestring): try: ijob = int(jobInput) return [ijob] except BaseException: return [] if isinstance(jobInput, list): try: ljob = [int(x) for x in jobInput] return ljob except BaseException: return [] return [] ########################################################################### types_rescheduleJob = [] def export_rescheduleJob(self, jobIDs): """ Reschedule a single job. If the optional proxy parameter is given it will be used to refresh the proxy in the Proxy Repository :param jobIDList: list of job IDs :return: confirmed job IDs """ jobList = self.__getJobList(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(jobList, RIGHT_RESCHEDULE) for jobID in validJobList: gtaskQueueDB.deleteJob(jobID) # gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) gLogger.debug(str(result)) if not result['OK']: return result gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') if invalidJobList or nonauthJobList: result = S_ERROR('Some jobs failed reschedule') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList return result result = S_OK(validJobList) result['requireProxyUpload'] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() self.__sendJobsToOptimizationMind(validJobList) return result @staticmethod def __deleteJob(jobID): """ Delete one job """ result = gJobDB.setJobStatus(jobID, 'Deleted', 'Checking accounting') if not result['OK']: return result result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue') # if it was the last job for the pilot, clear PilotsLogging about it result = gPilotAgentsDB.getPilotsForJobID(jobID) if not result['OK']: gLogger.error("Failed to get Pilots for JobID", result['Message']) return result for pilot in result['Value']: res = gPilotAgentsDB.getJobsForPilot(pilot) if not res['OK']: gLogger.error("Failed to get jobs for pilot", res['Message']) return res if not res['Value']: # if list of jobs for pilot is empty, delete pilot and pilotslogging result = gPilotAgentsDB.getPilotInfo(pilotID=pilot) if not result['OK']: gLogger.error("Failed to get pilot info", result['Message']) return result pilotRef = result[0]['PilotJobReference'] ret = gPilotAgentsDB.deletePilot(pilot) if not ret['OK']: gLogger.error("Failed to delete pilot from PilotAgentsDB", ret['Message']) return ret if enablePilotsLogging: ret = gPilotsLoggingDB.deletePilotsLogging(pilotRef) if not ret['OK']: gLogger.error("Failed to delete pilot logging from PilotAgentsDB", ret['Message']) return ret return S_OK() @staticmethod def __killJob(jobID, sendKillCommand=True): """ Kill one job """ if sendKillCommand: result = gJobDB.setJobCommand(jobID, 'Kill') if not result['OK']: return result gLogger.info('Job %d is marked for termination' % jobID) result = gJobDB.setJobStatus(jobID, 'Killed', 'Marked for termination') if not result['OK']: gLogger.warn('Failed to set job Killed status', result['Message']) result = gtaskQueueDB.deleteJob(jobID) if not result['OK']: gLogger.warn('Failed to delete job from the TaskQueue', result['Message']) return S_OK() def __kill_delete_jobs(self, jobIDList, right): """ Kill or delete jobs as necessary """ jobList = self.__getJobList(jobIDList) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDList)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(jobList, right) # Get job status to see what is to be killed or deleted result = gJobDB.getAttributesForJobList(validJobList, ['Status']) if not result['OK']: return result killJobList = [] deleteJobList = [] markKilledJobList = [] stagingJobList = [] for jobID, sDict in result['Value'].items(): if sDict['Status'] in ['Running', 'Matched', 'Stalled']: killJobList.append(jobID) elif sDict['Status'] in ['Done', 'Failed', 'Killed']: if not right == RIGHT_KILL: deleteJobList.append(jobID) else: markKilledJobList.append(jobID) if sDict['Status'] in ['Staging']: stagingJobList.append(jobID) badIDs = [] for jobID in markKilledJobList: result = self.__killJob(jobID, sendKillCommand=False) if not result['OK']: badIDs.append(jobID) for jobID in killJobList: result = self.__killJob(jobID) if not result['OK']: badIDs.append(jobID) for jobID in deleteJobList: result = self.__deleteJob(jobID) if not result['OK']: badIDs.append(jobID) if stagingJobList: stagerClient = StorageManagerClient() gLogger.info('Going to send killing signal to stager as well!') result = stagerClient.killTasksBySourceTaskID(stagingJobList) if not result['OK']: gLogger.warn('Failed to kill some Stager tasks: %s' % result['Message']) if nonauthJobList or badIDs: result = S_ERROR('Some jobs failed deletion') if nonauthJobList: gLogger.warn("Non-authorized JobIDs won't be deleted", str(nonauthJobList)) result['NonauthorizedJobIDs'] = nonauthJobList if badIDs: gLogger.warn("JobIDs failed to be deleted", str(badIDs)) result['FailedJobIDs'] = badIDs return result result = S_OK(validJobList) result['requireProxyUpload'] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() if invalidJobList: result['InvalidJobIDs'] = invalidJobList return result ########################################################################### types_deleteJob = [] def export_deleteJob(self, jobIDs): """ Delete jobs specified in the jobIDs list :param jobIDList: list of job IDs :return: S_OK/S_ERROR """ return self.__kill_delete_jobs(jobIDs, RIGHT_DELETE) ########################################################################### types_killJob = [] def export_killJob(self, jobIDs): """ Kill jobs specified in the jobIDs list :param jobIDList: list of job IDs :return: S_OK/S_ERROR """ return self.__kill_delete_jobs(jobIDs, RIGHT_KILL) ########################################################################### types_resetJob = [] def export_resetJob(self, jobIDs): """ Reset jobs specified in the jobIDs list :param jobIDList: list of job IDs :return: S_OK/S_ERROR """ jobList = self.__getJobList(jobIDs) if not jobList: return S_ERROR('Invalid job specification: ' + str(jobIDs)) validJobList, invalidJobList, nonauthJobList, ownerJobList = self.jobPolicy.evaluateJobRights(jobList, RIGHT_RESET) badIDs = [] good_ids = [] for jobID in validJobList: result = gJobDB.setJobAttribute(jobID, 'RescheduleCounter', -1) if not result['OK']: badIDs.append(jobID) else: gtaskQueueDB.deleteJob(jobID) # gJobDB.deleteJobFromQueue(jobID) result = gJobDB.rescheduleJob(jobID) if not result['OK']: badIDs.append(jobID) else: good_ids.append(jobID) gJobLoggingDB.addLoggingRecord(result['JobID'], result['Status'], result['MinorStatus'], application='Unknown', source='JobManager') self.__sendJobsToOptimizationMind(good_ids) if invalidJobList or nonauthJobList or badIDs: result = S_ERROR('Some jobs failed resetting') if invalidJobList: result['InvalidJobIDs'] = invalidJobList if nonauthJobList: result['NonauthorizedJobIDs'] = nonauthJobList if badIDs: result['FailedJobIDs'] = badIDs return result result = S_OK() result['requireProxyUpload'] = len(ownerJobList) > 0 and self.__checkIfProxyUploadIsRequired() return result