def test_matcher( self ): # insert a proper DN to run the test resourceDescription = {'OwnerGroup': 'prod', 'OwnerDN':'/C=ch/O=DIRAC/OU=DIRAC CI/CN=ciuser/[email protected]', 'DIRACVersion': 'pippo', 'ReleaseVersion':'blabla', 'VirtualOrganization':'LHCB', 'PilotInfoReportedFlag':'True', 'PilotBenchmark':'anotherPilot', 'LHCbPlatform':'CERTO', 'Site':'DIRAC.Jenkins.org', 'CPUTime' : 86400 } matcher = RPCClient( 'WorkloadManagement/Matcher' ) JobStateUpdate = RPCClient( 'WorkloadManagement/JobStateUpdate' ) wmsClient = WMSClient() job = helloWorldJob() job.setDestination( 'DIRAC.Jenkins.org' ) job.setInputData( '/a/bbb' ) job.setType( 'User' ) jobDescription = createFile( job ) res = wmsClient.submitJob( job._toJDL( xmlFile = jobDescription ) ) self.assert_( res['OK'] ) jobID = res['Value'] res = JobStateUpdate.setJobStatus( jobID, 'Waiting', 'matching', 'source' ) self.assert_( res['OK'] ) tqDB = TaskQueueDB() tqDefDict = {'OwnerDN': '/C=ch/O=DIRAC/OU=DIRAC CI/CN=ciuser/[email protected]', 'OwnerGroup':'prod', 'Setup':'dirac-JenkinsSetup', 'CPUTime':86400} res = tqDB.insertJob( jobID, tqDefDict, 10 ) self.assert_( res['OK'] ) res = matcher.requestJob( resourceDescription ) print res self.assert_( res['OK'] ) wmsClient.deleteJob( jobID )
def initializeJobManagerHandler(serviceInfo): global gJobDB, gJobLoggingDB, gtaskQueueDB gJobDB = JobDB() gJobLoggingDB = JobLoggingDB() gtaskQueueDB = TaskQueueDB() return S_OK()
def initializeMatcherHandler(serviceInfo): """ Matcher Service initialization """ global gJobDB global gTaskQueueDB global jlDB global pilotAgentsDB gJobDB = JobDB() gTaskQueueDB = TaskQueueDB() jlDB = JobLoggingDB() pilotAgentsDB = PilotAgentsDB() gMonitor.registerActivity('matchTime', "Job matching time", 'Matching', "secs", gMonitor.OP_MEAN, 300) gMonitor.registerActivity('matchesDone', "Job Match Request", 'Matching', "matches", gMonitor.OP_RATE, 300) gMonitor.registerActivity('matchesOK', "Matched jobs", 'Matching', "matches", gMonitor.OP_RATE, 300) gMonitor.registerActivity('numTQs', "Number of Task Queues", 'Matching', "tqsk queues", gMonitor.OP_MEAN, 300) gTaskQueueDB.recalculateTQSharesForAll() gThreadScheduler.addPeriodicTask(120, gTaskQueueDB.recalculateTQSharesForAll) gThreadScheduler.addPeriodicTask(60, sendNumTaskQueues) sendNumTaskQueues() return S_OK()
def __init__(self, pilotAgentsDB=None, jobDB=None, tqDB=None, jlDB=None, opsHelper=None): """ c'tor """ if pilotAgentsDB: self.pilotAgentsDB = pilotAgentsDB else: self.pilotAgentsDB = PilotAgentsDB() if jobDB: self.jobDB = jobDB else: self.jobDB = JobDB() if tqDB: self.tqDB = tqDB else: self.tqDB = TaskQueueDB() if jlDB: self.jlDB = jlDB else: self.jlDB = JobLoggingDB() if opsHelper: self.opsHelper = opsHelper else: self.opsHelper = Operations() self.log = gLogger.getSubLogger("Matcher") self.limiter = Limiter(jobDB=self.jobDB, opsHelper=self.opsHelper) self.siteClient = SiteStatus()
def initializeJobMonitoringHandler(serviceInfo): global jobDB, jobLoggingDB, taskQueueDB jobDB = JobDB() jobLoggingDB = JobLoggingDB() taskQueueDB = TaskQueueDB() return S_OK()
def export_getPilots(cls, jobID): """ Get pilot references and their states for : - those pilots submitted for the TQ where job is sitting - (or) the pilots executing/having executed the Job """ pilots = [] result = pilotDB.getPilotsForJobID(int(jobID)) if not result['OK']: if result['Message'].find('not found') == -1: return S_ERROR('Failed to get pilot: ' + result['Message']) else: pilots += result['Value'] if not pilots: # Pilots were not found try to look in the Task Queue taskQueueID = 0 result = TaskQueueDB().getTaskQueueForJob(int(jobID)) if result['OK'] and result['Value']: taskQueueID = result['Value'] if taskQueueID: result = pilotDB.getPilotsForTaskQueue(taskQueueID, limit=10) if not result['OK']: return S_ERROR('Failed to get pilot: ' + result['Message']) pilots += result['Value'] if not pilots: return S_ERROR('Failed to get pilot for Job %d' % int(jobID)) return pilotDB.getPilotInfo(pilotID=pilots)
def initialize(self): """ Sets defaults """ self.am_setOption("PollingTime", 120) self.jobDB = JobDB() self.taskQueueDB = TaskQueueDB() self.jobLoggingDB = JobLoggingDB() # self.sandboxDB = SandboxDB( 'SandboxDB' ) agentTSTypes = self.am_getOption('ProductionTypes', []) if agentTSTypes: self.prod_types = agentTSTypes else: self.prod_types = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge']) gLogger.info( "Will exclude the following Production types from cleaning %s" % (', '.join(self.prod_types))) self.maxJobsAtOnce = self.am_getOption('MaxJobsAtOnce', 500) self.jobByJob = self.am_getOption('JobByJob', False) self.throttlingPeriod = self.am_getOption('ThrottlingPeriod', 0.) self.removeStatusDelay['Done'] = self.am_getOption( 'RemoveStatusDelay/Done', 7) self.removeStatusDelay['Killed'] = self.am_getOption( 'RemoveStatusDelay/Killed', 7) self.removeStatusDelay['Failed'] = self.am_getOption( 'RemoveStatusDelay/Failed', 7) self.removeStatusDelay['Any'] = self.am_getOption( 'RemoveStatusDelay/Any', -1) return S_OK()
def initializeJobMonitoringHandler(serviceInfo): global gJobDB, gJobLoggingDB, gTaskQueueDB gJobDB = JobDB() gJobLoggingDB = JobLoggingDB() gTaskQueueDB = TaskQueueDB() return S_OK()
def checkDBAccess(cls): # Init DB if there if not JobState.__db.checked: JobState.__db.jobDB = JobDB() JobState.__db.logDB = JobLoggingDB() JobState.__db.tqDB = TaskQueueDB() JobState.__db.checked = True
def cleanTaskQueues(): tqDB = TaskQueueDB() jobDB = JobDB() logDB = JobLoggingDB() result = tqDB.enableAllTaskQueues() if not result['OK']: return result result = tqDB.findOrphanJobs() if not result['OK']: return result for jid in result['Value']: result = tqDB.deleteJob(jid) if not result['OK']: gLogger.error("Cannot delete from TQ job %s" % jid, result['Message']) continue result = jobDB.rescheduleJob(jid) if not result['OK']: gLogger.error("Cannot reschedule in JobDB job %s" % jid, result['Message']) continue result = logDB.addLoggingRecord(jid, JobStatus.RECEIVED, "", "", source="JobState") if not result['OK']: gLogger.error("Cannot add logging record in JobLoggingDB %s" % jid, result['Message']) continue return S_OK()
def initializeHandler(cls, svcInfoDict): cls.gJobDB = JobDB() cls.gJobLoggingDB = JobLoggingDB() cls.gTaskQueueDB = TaskQueueDB() cls.gElasticJobParametersDB = None useESForJobParametersFlag = Operations().getValue( '/Services/JobMonitoring/useESForJobParametersFlag', False) if useESForJobParametersFlag: cls.gElasticJobParametersDB = ElasticJobParametersDB() return S_OK()
def initializeWMSAdministratorHandler(serviceInfo): """ WMS AdministratorService initialization """ global jobDB global taskQueueDB jobDB = JobDB() taskQueueDB = TaskQueueDB() return S_OK()
def initializeHandler(cls, svcInfoDict): """ WMS AdministratorService initialization """ cls.jobDB = JobDB() cls.taskQueueDB = TaskQueueDB() cls.elasticJobParametersDB = None useESForJobParametersFlag = Operations().getValue( '/Services/JobMonitoring/useESForJobParametersFlag', False) if useESForJobParametersFlag: cls.elasticJobParametersDB = ElasticJobParametersDB() return S_OK()
def initializeWMSAdministratorHandler(serviceInfo): """ WMS AdministratorService initialization :param dict serviceInfo: service information dictionary :return: S_OK()/S_ERROR() """ global jobDB global taskQueueDB jobDB = JobDB() taskQueueDB = TaskQueueDB() return S_OK()
def initializeOptimizer(self): """Initialize specific parameters for TaskQueueAgent. """ self.waitingStatus = self.am_getOption('WaitingStatus', 'Waiting') self.waitingMinorStatus = self.am_getOption('WaitingMinorStatus', 'Pilot Agent Submission') try: self.taskQueueDB = TaskQueueDB() result = self.taskQueueDB.enableAllTaskQueues() if not result['OK']: raise Exception("Can't enable TaskQueues: %s" % result['Message']) except Exception, e: self.log.exception() return S_ERROR("Cannot initialize taskqueueDB: %s" % str(e))
def initializeJobManagerHandler(serviceInfo): global gJobDB, gJobLoggingDB, gtaskQueueDB, enablePilotsLogging, gPilotAgentsDB, gPilotsLoggingDB gJobDB = JobDB() gJobLoggingDB = JobLoggingDB() gtaskQueueDB = TaskQueueDB() gPilotAgentsDB = PilotAgentsDB() # there is a problem with accessing CS with shorter paths, so full path is extracted from serviceInfo dict enablePilotsLogging = gConfig.getValue( serviceInfo['serviceSectionPath'].replace('JobManager', 'PilotsLogging') + '/Enable', 'False').lower() in ('yes', 'true') if enablePilotsLogging: gPilotsLoggingDB = PilotsLoggingDB() return S_OK()
def test_matcher(self): # insert a proper DN to run the test resourceDescription = { "OwnerGroup": "prod", "OwnerDN": "/C=ch/O=DIRAC/OU=DIRAC CI/CN=ciuser", "DIRACVersion": "pippo", "GridCE": "some.grid.ce.org", "ReleaseVersion": "blabla", "VirtualOrganization": "LHCb", "PilotInfoReportedFlag": "True", "PilotBenchmark": "anotherPilot", "Site": "DIRAC.Jenkins.ch", "CPUTime": 86400, } wmsClient = WMSClient() job = helloWorldJob() job.setDestination("DIRAC.Jenkins.ch") job.setInputData("/a/bbb") job.setType("User") jobDescription = createFile(job) res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(res["OK"], res.get("Message")) jobID = res["Value"] # forcing the update res = JobStateUpdateClient().setJobStatus(jobID, JobStatus.WAITING, "matching", "source", None, True) self.assertTrue(res["OK"], res.get("Message")) tqDB = TaskQueueDB() tqDefDict = { "OwnerDN": "/C=ch/O=DIRAC/OU=DIRAC CI/CN=ciuser", "OwnerGroup": "prod", "Setup": "dirac-JenkinsSetup", "CPUTime": 86400, } res = tqDB.insertJob(jobID, tqDefDict, 10) self.assertTrue(res["OK"], res.get("Message")) res = MatcherClient().requestJob(resourceDescription) print(res) self.assertTrue(res["OK"], res.get("Message")) wmsClient.deleteJob(jobID)
def initializeMatcherHandler(serviceInfo): """ Matcher Service initialization """ global gJobDB global gJobLoggingDB global gTaskQueueDB global gPilotAgentsDB # Create JobDB object and initialize its tables. gJobDB = JobDB() res = gJobDB._checkTable() if not res['OK']: return res # Create JobLoggingDB object and initialize its tables. gJobLoggingDB = JobLoggingDB() res = gJobLoggingDB._checkTable() if not res['OK']: return res gTaskQueueDB = TaskQueueDB() # Create PilotAgentsDB object and initialize its tables. gPilotAgentsDB = PilotAgentsDB() res = gPilotAgentsDB._checkTable() if not res['OK']: return res gMonitor.registerActivity('matchTime', "Job matching time", 'Matching', "secs", gMonitor.OP_MEAN, 300) gMonitor.registerActivity('matchesDone', "Job Match Request", 'Matching', "matches", gMonitor.OP_RATE, 300) gMonitor.registerActivity('matchesOK', "Matched jobs", 'Matching', "matches", gMonitor.OP_RATE, 300) gMonitor.registerActivity('numTQs', "Number of Task Queues", 'Matching', "tqsk queues", gMonitor.OP_MEAN, 300) gTaskQueueDB.recalculateTQSharesForAll() gThreadScheduler.addPeriodicTask(120, gTaskQueueDB.recalculateTQSharesForAll) gThreadScheduler.addPeriodicTask(60, sendNumTaskQueues) sendNumTaskQueues() return S_OK()
def initializeHandler(cls, serviceInfoDict): """ Initialization of DB objects and OptimizationMind """ cls.jobDB = JobDB() cls.jobLoggingDB = JobLoggingDB() cls.taskQueueDB = TaskQueueDB() cls.pilotAgentsDB = PilotAgentsDB() cls.pilotsLoggingDB = None enablePilotsLogging = Operations().getValue( '/Services/JobMonitoring/usePilotsLoggingFlag', False) if enablePilotsLogging: cls.pilotsLoggingDB = PilotsLoggingDB() cls.msgClient = MessageClient("WorkloadManagement/OptimizationMind") cls.__connectToOptMind() gThreadScheduler.addPeriodicTask(60, cls.__connectToOptMind) return S_OK()
def initialize(self): """Sets defaults """ self.am_setOption("PollingTime", 60) self.jobDB = JobDB() self.taskQueueDB = TaskQueueDB() # self.sandboxDB = SandboxDB( 'SandboxDB' ) self.prod_types = self.am_getOption('ProductionTypes', [ 'DataReconstruction', 'DataStripping', 'MCSimulation', 'Merge', 'production' ]) gLogger.info( 'Will exclude the following Production types from cleaning %s' % (string.join(self.prod_types, ', '))) self.maxJobsAtOnce = self.am_getOption('MaxJobsAtOnce', 200) self.jobByJob = self.am_getOption('JobByJob', True) self.throttlingPeriod = self.am_getOption('ThrottlingPeriod', 0.) return S_OK()
def initialize( self ): """Sets defaults """ self.am_setOption( "PollingTime", 60 ) self.jobDB = JobDB() self.taskQueueDB = TaskQueueDB() self.jobLoggingDB = JobLoggingDB() # self.sandboxDB = SandboxDB( 'SandboxDB' ) agentTSTypes = self.am_getOption('ProductionTypes', []) if agentTSTypes: self.prod_types = agentTSTypes else: self.prod_types = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] ) gLogger.info('Will exclude the following Production types from cleaning %s'%(string.join(self.prod_types,', '))) self.maxJobsAtOnce = self.am_getOption('MaxJobsAtOnce',200) self.jobByJob = self.am_getOption('JobByJob',True) self.throttlingPeriod = self.am_getOption('ThrottlingPeriod',0.) return S_OK()
def initializeHandler(cls, serviceInfoDict): cls.jobDB = JobDB() cls.jobLoggingDB = JobLoggingDB() cls.taskQueueDB = TaskQueueDB() cls.pilotAgentsDB = PilotAgentsDB() cls.limiter = Limiter(jobDB=cls.jobDB) cls.taskQueueDB.recalculateTQSharesForAll() gMonitor.registerActivity('matchTime', "Job matching time", 'Matching', "secs", gMonitor.OP_MEAN, 300) gMonitor.registerActivity('matchesDone', "Job Match Request", 'Matching', "matches", gMonitor.OP_RATE, 300) gMonitor.registerActivity('matchesOK', "Matched jobs", 'Matching', "matches", gMonitor.OP_RATE, 300) gMonitor.registerActivity('numTQs', "Number of Task Queues", 'Matching', "tqsk queues", gMonitor.OP_MEAN, 300) gThreadScheduler.addPeriodicTask(120, cls.taskQueueDB.recalculateTQSharesForAll) gThreadScheduler.addPeriodicTask(60, cls.sendNumTaskQueues) cls.sendNumTaskQueues() return S_OK()
def initializeWMSAdministratorHandler(serviceInfo): """ WMS AdministratorService initialization """ global jobDB global pilotDB global taskQueueDB global enablePilotsLogging # there is a problem with accessing CS with shorter paths, so full path is extracted from serviceInfo dict enablePilotsLogging = gConfig.getValue( serviceInfo['serviceSectionPath'].replace( 'WMSAdministrator', 'PilotsLogging') + '/Enable', 'False').lower() in ( 'yes', 'true') jobDB = JobDB() pilotDB = PilotAgentsDB() taskQueueDB = TaskQueueDB() if enablePilotsLogging: pilotsLoggingDB = PilotsLoggingDB() return S_OK()
def initializeMatcherHandler( serviceInfo ): """ Matcher Service initialization """ global jobDB global jobLoggingDB global taskQueueDB jobDB = JobDB() jobLoggingDB = JobLoggingDB() taskQueueDB = TaskQueueDB() gMonitor.registerActivity( 'matchTime', "Job matching time", 'Matching', "secs" , gMonitor.OP_MEAN, 300 ) gMonitor.registerActivity( 'matchTaskQueues', "Task queues checked per job", 'Matching', "task queues" , gMonitor.OP_MEAN, 300 ) gMonitor.registerActivity( 'matchesDone', "Job Matches", 'Matching', "matches" , gMonitor.OP_MEAN, 300 ) gMonitor.registerActivity( 'numTQs', "Number of Task Queues", 'Matching', "tqsk queues" , gMonitor.OP_MEAN, 300 ) taskQueueDB.recalculateTQSharesForAll() gThreadScheduler.addPeriodicTask( 120, taskQueueDB.recalculateTQSharesForAll ) gThreadScheduler.addPeriodicTask( 120, sendNumTaskQueues ) sendNumTaskQueues() return S_OK()
# File : dirac-admin-submit-pilot-for-job # Author : Ricardo Graciani ######################################################################## __RCSID__ = "$Id$" import sys import DIRAC from DIRAC.Core.Base import Script Script.parseCommandLine( ignoreErrors = True ) args = Script.getPositionalArgs() from DIRAC.WorkloadManagementSystem.DB.TaskQueueDB import TaskQueueDB from DIRAC.WorkloadManagementSystem.DB.JobDB import JobDB jobdb = JobDB() tqdb = TaskQueueDB() result = jobdb.selectJobs( { 'Status' : [ 'Received', 'Checking', 'Waiting' ] } ) if not result[ 'OK' ]: print result[ 'Message' ] sys.exit( 1 ) jobList = result[ 'Value' ] print tqdb.forceRecreationOfTables() for job in jobList: result = jobdb.getJobAttribute( job, 'RescheduleCounter' ) if not result[ 'OK' ]: print "Cannot get reschedule counter for job %s" % job rC = 0 rC = result[ 'Value' ] if rC >= jobdb.maxRescheduling: jobdb.setJobAttribute( job, "RescheduleCounter", "0" )
# print db.checkImageStatus( 'name', 'flavor'*10, 'requirements' ) ret = db.insertInstance( 'Image3', 'instance' ) print "insertInstance ", ret ret = db.insertInstance( 'Image2', 'instance' ) print "insertInstance ", ret if not ret['OK']: DIRAC.exit() print type( ret['Value'] ) print "declareInstanceSubmitted", db.declareInstanceSubmitted( ret['Value'] ) id1 = DIRAC.Time.toString() print "declareInstanceRunning ", db.declareInstanceRunning( 'Image3', id1, 'IP', 'ip' ) id2 = DIRAC.Time.toString() print "declareInstanceRunning ", db.declareInstanceRunning( 'Image2', id2, 'IP', 'ip' ) print "declareInstanceRunning ", db.instanceIDHeartBeat( id2, 1.0 ) for status in validStates: print "get%10sInstances " % status, db.getInstancesByStatus( status ) print "declareInstanceHalting ", db.declareInstanceHalting( id1, 0.0 ) print "declareInstanceHalting ", db.declareInstanceHalting( id2, 0.0 ) print "declareStalledInstances ", db.declareStalledInstances() print "declareStalledInstances ", db.declareStalledInstances() from DIRAC.WorkloadManagementSystem.DB.TaskQueueDB import TaskQueueDB tq = TaskQueueDB() print tq.retrieveTaskQueues()
def __init__(self, jid, source="Unknown"): self.__jid = jid self.__source = str(source) self.jobDB = JobDB() self.logDB = JobLoggingDB() self.tqDB = TaskQueueDB()
def setUp(self): gLogger.setLevel('DEBUG') self.tqDB = TaskQueueDB()
Suggestion: for local testing, run this with:: python -m pytest -c ../pytest.ini -vv tests/Integration/WorkloadManagementSystem/Test_TaskQueueDB.py """ from DIRAC import gLogger from DIRAC.Core.Base.Script import parseCommandLine parseCommandLine() from DIRAC.WorkloadManagementSystem.DB.TaskQueueDB import TaskQueueDB gLogger.setLevel('DEBUG') tqDB = TaskQueueDB() def test_basicChain(): """ a basic put - remove """ tqDefDict = { 'OwnerDN': '/my/DN', 'OwnerGroup': 'myGroup', 'Setup': 'aSetup', 'CPUTime': 50000 } result = tqDB.insertJob(123, tqDefDict, 10) assert result['OK'] is True result = tqDB.getTaskQueueForJobs([123]) assert result['OK'] is True
def initialize(self): """just initialize TQDB""" self.tqDB = TaskQueueDB() return S_OK()