def execute( self ): """Main Agent code: 1.- Query TaskQueueDB for existing TQs 2.- Count Pending Jobs 3.- Submit Jobs """ self.__checkSubmitPools() bigDataJobsToSubmit = {} bigDataJobIdsToSubmit = {} for directorName, directorDict in self.directors.items(): self.log.verbose( 'Checking Director:', directorName ) self.log.verbose( 'RunningEndPoints:', directorDict['director'].runningEndPoints ) for runningEndPointName in directorDict['director'].runningEndPoints: runningEndPointDict = directorDict['director'].runningEndPoints[runningEndPointName] NameNode = runningEndPointDict['NameNode'] jobsByEndPoint = 0 result = BigDataDB.getBigDataJobsByStatusAndEndpoint( 'Submitted', NameNode ) if result['OK']: jobsByEndPoint += len( result['Value'] ) result = BigDataDB.getBigDataJobsByStatusAndEndpoint( 'Running', NameNode ) if result['OK']: jobsByEndPoint += len( result['Value'] ) self.log.verbose( 'Checking Jobs By EndPoint %s:' % jobsByEndPoint ) jobLimitsEndPoint = runningEndPointDict['LimitQueueJobsEndPoint'] bigDataJobs = 0 if jobsByEndPoint >= jobLimitsEndPoint: self.log.info( '%s >= %s Running jobs reach job limits: %s, skipping' % ( jobsByEndPoint, jobLimitsEndPoint, runningEndPointName ) ) continue else: bigDataJobs = jobLimitsEndPoint - jobsByEndPoint requirementsDict = runningEndPointDict['Requirements'] self.log.info( 'Requirements Dict: ', requirementsDict ) result = taskQueueDB.getMatchingTaskQueues( requirementsDict ) if not result['OK']: self.log.error( 'Could not retrieve TaskQueues from TaskQueueDB', result['Message'] ) return result taskQueueDict = result['Value'] self.log.info( 'Task Queues Dict: ', taskQueueDict ) jobs = 0 priority = 0 cpu = 0 jobsID = 0 self.log.info( 'Pending Jobs from TaskQueue, which not matching before: ', self.pendingTaskQueueJobs ) for tq in taskQueueDict: jobs += taskQueueDict[tq]['Jobs'] priority += taskQueueDict[tq]['Priority'] cpu += taskQueueDict[tq]['Jobs'] * taskQueueDict[tq]['CPUTime'] #Matching of Jobs with BigData Softwares #This process is following the sequence: #Retrieve a job from taskqueueDict #Get job name and try to match with the resources #If not match store the var pendingTaskQueueJobs for the #next iteration # #This matching is doing with the following JobName Pattern # NameSoftware _ SoftwareVersion _ HighLanguageName _ HighLanguageVersion _ DataSetName #extract a job from the TaskQueue if tq not in self.pendingTaskQueueJobs.keys(): self.pendingTaskQueueJobs[tq] = {} getJobFromTaskQueue = taskQueueDB.matchAndGetJob( taskQueueDict[tq] ) if not getJobFromTaskQueue['OK']: self.log.error( 'Could not get Job and FromTaskQueue', getJobFromTaskQueue['Message'] ) return getJobFromTaskQueue jobInfo = getJobFromTaskQueue['Value'] jobID = jobInfo['jobId'] jobAttrInfo = jobDB.getJobAttributes( jobID ) if not jobAttrInfo['OK']: self.log.error( 'Could not get Job Attributes', jobAttrInfo['Message'] ) return jobAttrInfo jobInfoUniq = jobAttrInfo['Value'] jobName = jobInfoUniq['JobName'] self.pendingTaskQueueJobs[tq][jobID] = jobName result = jobDB.getJobJDL( jobID, True ) classAdJob = ClassAd( result['Value'] ) arguments = 0 if classAdJob.lookupAttribute( 'Arguments' ): arguments = classAdJob.getAttributeString( 'Arguments' ) #if not classAdJob.lookupAttribute( 'Arguments' ): # continue jobsToSubmit = self.matchingJobsForBDSubmission( arguments, runningEndPointName, runningEndPointDict['BigDataSoftware'], runningEndPointDict['BigDataSoftwareVersion'], runningEndPointDict['HighLevelLanguage']['HLLName'], runningEndPointDict['HighLevelLanguage']['HLLVersion'], jobID ) if ( jobsToSubmit == "OK" ): if directorName not in bigDataJobsToSubmit: bigDataJobsToSubmit[directorName] = {} if runningEndPointName not in bigDataJobsToSubmit[directorName]: bigDataJobsToSubmit[directorName][runningEndPointName] = {} bigDataJobsToSubmit[directorName][runningEndPointName] = { 'JobId': jobID, 'JobName': jobName, 'TQPriority': priority, 'CPUTime': cpu, 'BigDataEndpoint': runningEndPointName, 'BigDataEndpointNameNode': runningEndPointDict['NameNode'], 'BdSoftware': runningEndPointDict['BigDataSoftware'], 'BdSoftwareVersion': runningEndPointDict['BigDataSoftwareVersion'], 'HLLName' : runningEndPointDict['HighLevelLanguage']['HLLName'], 'HLLVersion' : runningEndPointDict['HighLevelLanguage']['HLLVersion'], 'NumBigDataJobsAllowedToSubmit': bigDataJobs, 'SiteName': runningEndPointDict['SiteName'], 'PublicIP': runningEndPointDict['PublicIP'], 'User': runningEndPointDict['User'], 'Port': runningEndPointDict['Port'], 'UsePilot': runningEndPointDict['UsePilot'], 'IsInteractive': runningEndPointDict['IsInteractive'], 'Arguments': arguments } del self.pendingTaskQueueJobs[tq][jobID] else: self.log.error( jobsToSubmit ) self.log.info( 'Pending Jobs from TaskQueue, which not matching after: ', self.pendingTaskQueueJobs ) for tq in self.pendingTaskQueueJobs.keys(): for jobid in self.pendingTaskQueueJobs[tq].keys(): result = jobDB.getJobJDL( jobid, True ) classAdJob = ClassAd( result['Value'] ) arguments = 0 if classAdJob.lookupAttribute( 'Arguments' ): arguments = classAdJob.getAttributeString( 'Arguments' ) #if not classAdJob.lookupAttribute( 'Arguments' ): # continue #do the match with the runningEndPoint jobsToSubmit = self.matchingJobsForBDSubmission( arguments, runningEndPointName, runningEndPointDict['BigDataSoftware'], runningEndPointDict['BigDataSoftwareVersion'], runningEndPointDict['HighLevelLanguage']['HLLName'], runningEndPointDict['HighLevelLanguage']['HLLVersion'], jobid ) if ( jobsToSubmit == "OK" ): if directorName not in bigDataJobsToSubmit: bigDataJobsToSubmit[directorName] = {} if runningEndPointName not in bigDataJobsToSubmit[directorName]: bigDataJobsToSubmit[directorName][runningEndPointName] = {} bigDataJobsToSubmit[directorName][runningEndPointName] = { 'JobId': jobid, 'JobName': self.pendingTaskQueueJobs[tq][jobid], 'TQPriority': priority, 'CPUTime': cpu, 'BigDataEndpoint': runningEndPointName, 'BigDataEndpointNameNode': runningEndPointDict['NameNode'], 'BdSoftware': runningEndPointDict['BigDataSoftware'], 'BdSoftwareVersion': runningEndPointDict['BigDataSoftwareVersion'], 'HLLName' : runningEndPointDict['HighLevelLanguage']['HLLName'], 'HLLVersion' : runningEndPointDict['HighLevelLanguage']['HLLVersion'], 'NumBigDataJobsAllowedToSubmit': bigDataJobs, 'SiteName': runningEndPointDict['SiteName'], 'PublicIP': runningEndPointDict['PublicIP'], 'User': runningEndPointDict['User'], 'Port': runningEndPointDict['Port'], 'UsePilot': runningEndPointDict['UsePilot'], 'IsInteractive': runningEndPointDict['IsInteractive'], 'Arguments': arguments } del self.pendingTaskQueueJobs[tq][jobid] else: self.log.error( jobsToSubmit ) if not jobs and not self.pendingTaskQueueJobs: self.log.info( 'No matching jobs for %s found, skipping' % NameNode ) continue self.log.info( '___BigDataJobsTo Submit:', bigDataJobsToSubmit ) for directorName, JobsToSubmitDict in bigDataJobsToSubmit.items(): for runningEndPointName, jobsToSubmitDict in JobsToSubmitDict.items(): if self.directors[directorName]['isEnabled']: self.log.info( 'Requesting submission to %s of %s' % ( runningEndPointName, directorName ) ) director = self.directors[directorName]['director'] pool = self.pools[self.directors[directorName]['pool']] jobIDs = JobsToSubmitDict[runningEndPointName]['JobId'] jobName = JobsToSubmitDict[runningEndPointName]['JobName'] endpoint = JobsToSubmitDict[runningEndPointName]['BigDataEndpoint'] runningSiteName = JobsToSubmitDict[runningEndPointName]['SiteName'] NameNode = JobsToSubmitDict[runningEndPointName]['BigDataEndpointNameNode'] BigDataSoftware = JobsToSubmitDict[runningEndPointName]['BdSoftware'] BigDataSoftwareVersion = JobsToSubmitDict[runningEndPointName]['BdSoftwareVersion'] HLLName = JobsToSubmitDict[runningEndPointName]['HLLName'] HLLVersion = JobsToSubmitDict[runningEndPointName]['HLLVersion'] PublicIP = JobsToSubmitDict[runningEndPointName]['PublicIP'] User = JobsToSubmitDict[runningEndPointName]['User'] Port = JobsToSubmitDict[runningEndPointName]['Port'] UsePilot = JobsToSubmitDict[runningEndPointName]['UsePilot'] IsInteractive = JobsToSubmitDict[runningEndPointName]['IsInteractive'] Arguments = JobsToSubmitDict[runningEndPointName]['Arguments'] numBigDataJobsAllowed = JobsToSubmitDict[runningEndPointName]['NumBigDataJobsAllowedToSubmit'] ret = pool.generateJobAndQueueIt( director.submitBigDataJobs, args = ( endpoint, numBigDataJobsAllowed, runningSiteName, NameNode, BigDataSoftware, BigDataSoftwareVersion, HLLName, HLLVersion, PublicIP, Port, jobIDs, runningEndPointName, jobName, User, self.jobDataset, UsePilot, IsInteractive ), oCallback = self.callBack, oExceptionCallback = director.exceptionCallBack, blocking = False ) if not ret['OK']: # Disable submission until next iteration self.directors[directorName]['isEnabled'] = False else: time.sleep( self.am_getOption( 'ThreadStartDelay' ) ) if 'Default' in self.pools: # only for those in "Default' thread Pool # for pool in self.pools: self.pools['Default'].processAllResults() return DIRAC.S_OK()