def createNewJobs(self, wmbsJobs): """ _createNewJobs_ Create new jobs in the BossAir database Accepts WMBS Jobs """ existingTransaction = self.beginTransaction() jobsToCreate = [] # First turn wmbsJobs into runJobs for wmbsJob in wmbsJobs: runJob = RunJob() runJob.buildFromJob(job=wmbsJob) if runJob.get('status') not in self.states: runJob['status'] = self.newState jobsToCreate.append(runJob) # Next insert them into the database self.newJobDAO.execute(jobs=jobsToCreate, conn=self.getDBConn(), transaction=self.existingTransaction()) self.commitTransaction(existingTransaction) return
def createNewJobs(self, wmbsJobs): """ _createNewJobs_ Create new jobs in the BossAir database Accepts WMBS Jobs """ existingTransaction = self.beginTransaction() jobsToCreate = [] # First turn wmbsJobs into runJobs for wmbsJob in wmbsJobs: runJob = RunJob() runJob.buildFromJob(job=wmbsJob) if runJob.get('status') not in self.states: runJob['status'] = self.newState jobsToCreate.append(runJob) # Next insert them into the database self.newJobDAO.execute(jobs=jobsToCreate, conn=self.getDBConn(), transaction=self.existingTransaction()) self.commitTransaction(existingTransaction) return
def loadByWMBS(self, wmbsJobs): """ _loadByWMBS_ Load BossAir info based on wmbs Jobs. """ if len(wmbsJobs) < 1: return [] jobList = self.loadByWMBSDAO.execute( jobs=wmbsJobs, conn=self.getDBConn(), transaction=self.existingTransaction()) loadedJobs = [] for job in jobList: rj = RunJob() rj.buildFromJob(job) loadedJobs.append(rj) if len(loadedJobs) != len(wmbsJobs): logging.error("Could not load all jobs in BossAir for WMBS input!") idList = [x['jobid'] for x in loadedJobs] for job in wmbsJobs: if job['id'] not in idList: logging.error( "Failed to retrieve wmbs_id %i and WMBS job info: %s", job['id'], job) return loadedJobs
def testC_CheckWMBSBuildRoleAndGroup(self): """ _CheckWMBSBuild_ Trivial test that checks whether we can build runJobs from WMBS jobs """ jobGroup = [] # Create jobs for id in range(10): testJob = Job( name = 'Job_%i' % (id) ) testJob['owner'] = "mnorman" testJob['usergroup'] = "mygroup_%i" % id testJob['userrole'] = "myrole_%i" % id testJob['location'] = 'Xanadu' jobGroup.append(testJob) for job in jobGroup: rj = RunJob() rj.buildFromJob(job = job) self.assertEqual(job['usergroup'], rj['usergroup']) self.assertEqual(job['userrole'], rj['userrole']) job2 = rj.buildWMBSJob() self.assertEqual(job['usergroup'], job2['usergroup']) self.assertEqual(job['userrole'], job2['userrole']) return
def testC_CheckWMBSBuildRoleAndGroup(self): """ _CheckWMBSBuild_ Trivial test that checks whether we can build runJobs from WMBS jobs """ jobGroup = [] # Create jobs for id in range(10): testJob = Job(name='Job_%i' % (id)) testJob['owner'] = "mnorman" testJob['usergroup'] = "mygroup_%i" % id testJob['userrole'] = "myrole_%i" % id testJob['location'] = 'Xanadu' jobGroup.append(testJob) for job in jobGroup: rj = RunJob() rj.buildFromJob(job=job) self.assertEqual(job['usergroup'], rj['usergroup']) self.assertEqual(job['userrole'], rj['userrole']) job2 = rj.buildWMBSJob() self.assertEqual(job['usergroup'], job2['usergroup']) self.assertEqual(job['userrole'], job2['userrole']) return
def _buildRunningJobs(self, wmbsJobs): """ _buildRunningJobs_ Build running jobs by loading information from the database and compiling it into a runJob object. This overwrites any information from the database with the info from the WMBS Job """ finalJobs = [] loadedJobs = self.loadByWMBS(wmbsJobs = wmbsJobs) if len(wmbsJobs) != len(loadedJobs): logging.error("Could not load all jobs in BossAir for WMBS input") for wmbsJob in wmbsJobs: for runJob in loadedJobs: if runJob['jobid'] == wmbsJob['id'] and runJob['retry_count'] == wmbsJob['retry_count']: rj = RunJob() rj.buildFromJob(wmbsJob) rj['id'] = runJob['id'] for key in rj.keys(): if rj[key] == None: rj[key] = runJob.get(key, None) finalJobs.append(rj) break # If we get here, we're sort of screwed # It means that although we sent for it, we couldn't find it. # Possibly means that the job just isn't in there yet. # Make a note of it, then do nothing logging.debug("Could not successfully load a runJob for wmbsJob %i:%i\n" % (wmbsJob['id'], wmbsJob['retry_count'])) logging.debug("WMBS Job: %s\n" % wmbsJob) return finalJobs
def _buildRunningJobs(self, wmbsJobs): """ _buildRunningJobs_ Build running jobs by loading information from the database and compiling it into a runJob object. This overwrites any information from the database with the info from the WMBS Job """ finalJobs = [] loadedJobs = self.loadByWMBS(wmbsJobs = wmbsJobs) if len(wmbsJobs) != len(loadedJobs): logging.error("Could not load all jobs in BossAir for WMBS input") for wmbsJob in wmbsJobs: for runJob in loadedJobs: if runJob['jobid'] == wmbsJob['id'] and runJob['retry_count'] == wmbsJob['retry_count']: rj = RunJob() rj.buildFromJob(wmbsJob) rj['id'] = runJob['id'] for key in rj.keys(): if rj[key] == None: rj[key] = runJob.get(key, None) finalJobs.append(rj) break # If we get here, we're sort of screwed # It means that although we sent for it, we couldn't find it. # Possibly means that the job just isn't in there yet. # Make a note of it, then do nothing logging.debug("Could not successfully load a runJob for wmbsJob %i:%i\n" % (wmbsJob['id'], wmbsJob['retry_count'])) logging.debug("WMBS Job: %s\n" % wmbsJob) return finalJobs
def loadByWMBS(self, wmbsJobs): """ _loadByWMBS_ Load BossAir info based on wmbs Jobs. """ if len(wmbsJobs) < 1: return [] jobList = self.loadByWMBSDAO.execute(jobs=wmbsJobs, conn=self.getDBConn(), transaction=self.existingTransaction()) loadedJobs = [] for job in jobList: rj = RunJob() rj.buildFromJob(job) loadedJobs.append(rj) if len(loadedJobs) != len(wmbsJobs): logging.error("Could not load all jobs in BossAir for WMBS input!") idList = [x['jobid'] for x in loadedJobs] for job in wmbsJobs: if job['id'] not in idList: logging.error("Failed to retrieve wmbs_id %i and WMBS job info: %s", job['id'], job) return loadedJobs
def testB_CheckWMBSBuild(self): """ _CheckWMBSBuild_ Trivial test that checks whether we can build runJobs from WMBS jobs """ jobGroup = self.createJobs(nJobs=10) for job in jobGroup.jobs: rj = RunJob() rj.buildFromJob(job=job) self.assertEqual(job['id'], rj['jobid']) self.assertEqual(job['retry_count'], rj['retry_count']) job2 = rj.buildWMBSJob() self.assertEqual(job['id'], job2['id']) self.assertEqual(job['retry_count'], job2['retry_count']) return
def testB_CheckWMBSBuild(self): """ _CheckWMBSBuild_ Trivial test that checks whether we can build runJobs from WMBS jobs """ jobGroup = self.createJobs(nJobs = 10) for job in jobGroup.jobs: rj = RunJob() rj.buildFromJob(job = job) self.assertEqual(job['id'], rj['jobid']) self.assertEqual(job['retry_count'], rj['retry_count']) job2 = rj.buildWMBSJob() self.assertEqual(job['id'], job2['id']) self.assertEqual(job['retry_count'], job2['retry_count']) return
def createJobs(self): """ _createJobs_ Create test jobs in WMBS and BossAir """ testWorkflow = Workflow(spec=makeUUID(), owner="tapas", name=makeUUID(), task="Test") testWorkflow.create() testFilesetA = Fileset(name="TestFilesetA") testFilesetA.create() testFilesetB = Fileset(name="TestFilesetB") testFilesetB.create() testFilesetC = Fileset(name="TestFilesetC") testFilesetC.create() testFileA = File(lfn="testFileA", locations=set(["testSE1", "testSE2"])) testFileA.create() testFilesetA.addFile(testFileA) testFilesetA.commit() testFilesetB.addFile(testFileA) testFilesetB.commit() testFilesetC.addFile(testFileA) testFilesetC.commit() testSubscriptionA = Subscription(fileset=testFilesetA, workflow=testWorkflow, type="Processing") testSubscriptionA.create() testSubscriptionA.addWhiteBlackList([{"site_name": "testSite1", "valid": True}]) testSubscriptionB = Subscription(fileset=testFilesetB, workflow=testWorkflow, type="Processing") testSubscriptionB.create() testSubscriptionB.addWhiteBlackList([{"site_name": "testSite1", "valid": False}]) testSubscriptionC = Subscription(fileset=testFilesetC, workflow=testWorkflow, type="Merge") testSubscriptionC.create() testJobGroupA = JobGroup(subscription=testSubscriptionA) testJobGroupA.create() testJobGroupB = JobGroup(subscription=testSubscriptionB) testJobGroupB.create() testJobGroupC = JobGroup(subscription=testSubscriptionC) testJobGroupC.create() # Site1, Has been assigned a location and is complete. testJobA = Job(name="testJobA", files=[testFileA]) testJobA["couch_record"] = makeUUID() testJobA.create(group=testJobGroupA) testJobA["state"] = "success" # Site 1, Has been assigned a location and is incomplete. testJobB = Job(name="testJobB", files=[testFileA]) testJobB["couch_record"] = makeUUID() testJobB["cache_dir"] = self.tempDir testJobB.create(group=testJobGroupA) testJobB["state"] = "executing" runJobB = RunJob() runJobB.buildFromJob(testJobB) runJobB["status"] = "PEND" # Does not have a location, white listed to site 1 testJobC = Job(name="testJobC", files=[testFileA]) testJobC["couch_record"] = makeUUID() testJobC.create(group=testJobGroupA) testJobC["state"] = "new" # Site 2, Has been assigned a location and is complete. testJobD = Job(name="testJobD", files=[testFileA]) testJobD["couch_record"] = makeUUID() testJobD.create(group=testJobGroupB) testJobD["state"] = "success" # Site 2, Has been assigned a location and is incomplete. testJobE = Job(name="testJobE", files=[testFileA]) testJobE["couch_record"] = makeUUID() testJobE.create(group=testJobGroupB) testJobE["state"] = "executing" runJobE = RunJob() runJobE.buildFromJob(testJobE) runJobE["status"] = "RUN" # Does not have a location, site 1 is blacklisted. testJobF = Job(name="testJobF", files=[testFileA]) testJobF["couch_record"] = makeUUID() testJobF.create(group=testJobGroupB) testJobF["state"] = "new" # Site 3, Has been assigned a location and is complete. testJobG = Job(name="testJobG", files=[testFileA]) testJobG["couch_record"] = makeUUID() testJobG.create(group=testJobGroupC) testJobG["state"] = "cleanout" # Site 3, Has been assigned a location and is incomplete. testJobH = Job(name="testJobH", files=[testFileA]) testJobH["couch_record"] = makeUUID() testJobH.create(group=testJobGroupC) testJobH["state"] = "new" # Site 3, Does not have a location. testJobI = Job(name="testJobI", files=[testFileA]) testJobI["couch_record"] = makeUUID() testJobI.create(group=testJobGroupC) testJobI["state"] = "new" # Site 3, Does not have a location and is in cleanout. testJobJ = Job(name="testJobJ", files=[testFileA]) testJobJ["couch_record"] = makeUUID() testJobJ.create(group=testJobGroupC) testJobJ["state"] = "cleanout" changeStateAction = self.daoFactory(classname="Jobs.ChangeState") changeStateAction.execute(jobs=[testJobA, testJobB, testJobC, testJobD, testJobE, testJobF, testJobG, testJobH, testJobI, testJobJ]) self.insertRunJob.execute([runJobB, runJobE]) setLocationAction = self.daoFactory(classname="Jobs.SetLocation") setLocationAction.execute(testJobA["id"], "testSite1") setLocationAction.execute(testJobB["id"], "testSite1") setLocationAction.execute(testJobD["id"], "testSite1") setLocationAction.execute(testJobE["id"], "testSite2") setLocationAction.execute(testJobG["id"], "testSite1") setLocationAction.execute(testJobH["id"], "testSite1") return
def submit(self, jobs, info=None): """ _submit_ Submit jobs using the plugin Requires both plugin name and workflow user from submitter Deals internally in RunJob objects, but interfaces to the outside with WMBS Job analogs Returns (successes, failures) """ self.check() successJobs = [] failureJobs = [] # TODO: Add plugin and user to input via JobSubmitter # IMPORTANT IMPORTANT IMPORTANT # Put job into RunJob format pluginDict = {} for job in jobs: rj = RunJob() rj.buildFromJob(job=job) if not job.get('location', False): rj['location'] = job.get('custom', {}).get('location', None) plugin = rj['plugin'] pluginDict.setdefault(plugin, []) pluginDict[plugin].append(rj) # Can't add to the cache in submit() # It's NOT the same bossAir instance # self.jobs.append(rj) for plugin in pluginDict.keys(): if plugin not in self.plugins.keys(): # Then we have a non-existant plugin msg = "CRITICAL ERROR: Non-existant plugin!\n" msg += "Given a plugin %s that we don't have access to.\n" % ( plugin) msg += "Ignoring the jobs for this plugin for now" logging.error(msg) continue try: pluginInst = self.plugins[plugin] jobsToSubmit = pluginDict.get(plugin, []) logging.debug("About to submit %i jobs to plugin %s", len(jobsToSubmit), plugin) localSuccess, localFailure = pluginInst.submit( jobs=jobsToSubmit, info=info) for job in localSuccess: successJobs.append(job.buildWMBSJob()) for job in localFailure: failureJobs.append(job.buildWMBSJob()) except WMException: raise except Exception as ex: msg = "Unhandled exception while submitting jobs to plugin: %s\n" % plugin msg += str(ex) logging.error(msg) logging.debug("Jobs being submitted: %s\n", jobsToSubmit) logging.debug("Job info: %s\n", info) raise BossAirException(msg) finally: # make sure we release this memory pluginDict.clear() del jobsToSubmit[:] # Create successful jobs in BossAir try: logging.debug("About to create %i new jobs in BossAir", len(successJobs)) self.createNewJobs(wmbsJobs=successJobs) except WMException: raise except Exception as ex: msg = "Unhandled error in creation of %i new jobs.\n" % len( successJobs) msg += str(ex) logging.error(msg) logging.debug("Job: %s", successJobs) raise BossAirException(msg) return successJobs, failureJobs
def submit(self, jobs, info = None): """ _submit_ Submit jobs using the plugin Requires both plugin name and workflow user from submitter Deals internally in RunJob objects, but interfaces to the outside with WMBS Job analogs Returns (successes, failures) """ self.check() successJobs = [] failureJobs = [] #TODO: Add plugin and user to input via JobSubmitter # IMPORTANT IMPORTANT IMPORTANT # Put job into RunJob format runJobs = [] for job in jobs: rj = RunJob() rj.buildFromJob(job = job) if not job.get('location', False): rj['location'] = job.get('custom', {}).get('location', None) runJobs.append(rj) # Can't add to the cache in submit() # It's NOT the same bossAir instance #self.jobs.append(rj) # Now figure out which plugin we need pluginDict = {} for job in runJobs: plugin = job['plugin'] if not plugin in pluginDict.keys(): pluginDict[plugin] = [] pluginDict[plugin].append(job) for plugin in pluginDict.keys(): if not plugin in self.plugins.keys(): # Then we have a non-existant plugin msg = "CRITICAL ERROR: Non-existant plugin!\n" msg += "Given a plugin %s that we don't have access to.\n" % (plugin) msg += "Ignoring the jobs for this plugin for now" logging.error(msg) continue try: pluginInst = self.plugins[plugin] jobsToSubmit = pluginDict.get(plugin, []) logging.debug("About to submit %i jobs to plugin %s" % (len(jobsToSubmit), plugin)) localSuccess, localFailure = pluginInst.submit(jobs = jobsToSubmit, info = info) for job in localSuccess: successJobs.append(job.buildWMBSJob()) for job in localFailure: failureJobs.append(job.buildWMBSJob()) except WMException: raise except Exception, ex: msg = "Unhandled exception while submitting jobs to plugin: %s\n" % plugin msg += str(ex) logging.error(msg) logging.debug("Jobs being submitted: %s\n" % (jobsToSubmit)) logging.debug("Job info: %s\n" % (info)) raise BossAirException(msg)
def submit(self, jobs, info = None): """ _submit_ Submit jobs using the plugin Requires both plugin name and workflow user from submitter Deals internally in RunJob objects, but interfaces to the outside with WMBS Job analogs Returns (successes, failures) """ self.check() successJobs = [] failureJobs = [] #TODO: Add plugin and user to input via JobSubmitter # IMPORTANT IMPORTANT IMPORTANT # Put job into RunJob format runJobs = [] for job in jobs: rj = RunJob() rj.buildFromJob(job = job) if not job.get('location', False): rj['location'] = job.get('custom', {}).get('location', None) runJobs.append(rj) # Can't add to the cache in submit() # It's NOT the same bossAir instance #self.jobs.append(rj) # Now figure out which plugin we need pluginDict = {} for job in runJobs: plugin = job['plugin'] if not plugin in pluginDict.keys(): pluginDict[plugin] = [] pluginDict[plugin].append(job) for plugin in pluginDict.keys(): if not plugin in self.plugins.keys(): # Then we have a non-existant plugin msg = "CRITICAL ERROR: Non-existant plugin!\n" msg += "Given a plugin %s that we don't have access to.\n" % (plugin) msg += "Ignoring the jobs for this plugin for now" logging.error(msg) continue try: pluginInst = self.plugins[plugin] jobsToSubmit = pluginDict.get(plugin, []) logging.debug("About to submit %i jobs to plugin %s" % (len(jobsToSubmit), plugin)) localSuccess, localFailure = pluginInst.submit(jobs = jobsToSubmit, info = info) for job in localSuccess: successJobs.append(job.buildWMBSJob()) for job in localFailure: failureJobs.append(job.buildWMBSJob()) except WMException: raise except Exception, ex: msg = "Unhandled exception while submitting jobs to plugin: %s\n" % plugin msg += str(ex) logging.error(msg) logging.debug("Jobs being submitted: %s\n" % (jobsToSubmit)) logging.debug("Job info: %s\n" % (info)) raise BossAirException(msg)
def testList(self): """ _testList_ Test the functions that list thresholds for creating jobs and submitting jobs. """ myResourceControl = ResourceControl() myResourceControl.insertSite("testSite1", 10, 20, "testSE1", "testCE1", "T1_US_FNAL", "LsfPlugin") myResourceControl.insertSite("testSite2", 20, 40, "testSE2", "testCE2") myResourceControl.insertThreshold("testSite1", "Processing", 20, 10) myResourceControl.insertThreshold("testSite1", "Merge", 200, 100) myResourceControl.insertThreshold("testSite2", "Processing", 50, 25) myResourceControl.insertThreshold("testSite2", "Merge", 135, 65) testWorkflow = Workflow(spec=makeUUID(), owner="Steve", name=makeUUID(), task="Test") testWorkflow.create() testFilesetA = Fileset(name="TestFilesetA") testFilesetA.create() testFilesetB = Fileset(name="TestFilesetB") testFilesetB.create() testFilesetC = Fileset(name="TestFilesetC") testFilesetC.create() testFileA = File(lfn="testFileA", locations=set(["testSE1", "testSE2"])) testFileA.create() testFilesetA.addFile(testFileA) testFilesetA.commit() testFilesetB.addFile(testFileA) testFilesetB.commit() testFilesetC.addFile(testFileA) testFilesetC.commit() testSubscriptionA = Subscription(fileset=testFilesetA, workflow=testWorkflow, type="Processing") testSubscriptionA.create() testSubscriptionA.addWhiteBlackList([{ "site_name": "testSite1", "valid": True }]) testSubscriptionB = Subscription(fileset=testFilesetB, workflow=testWorkflow, type="Processing") testSubscriptionB.create() testSubscriptionB.addWhiteBlackList([{ "site_name": "testSite1", "valid": False }]) testSubscriptionC = Subscription(fileset=testFilesetC, workflow=testWorkflow, type="Merge") testSubscriptionC.create() testJobGroupA = JobGroup(subscription=testSubscriptionA) testJobGroupA.create() testJobGroupB = JobGroup(subscription=testSubscriptionB) testJobGroupB.create() testJobGroupC = JobGroup(subscription=testSubscriptionC) testJobGroupC.create() # Site1, Has been assigned a location and is complete. testJobA = Job(name="testJobA", files=[testFileA]) testJobA["couch_record"] = makeUUID() testJobA.create(group=testJobGroupA) testJobA["state"] = "success" # Site 1, Has been assigned a location and is incomplete. testJobB = Job(name="testJobB", files=[testFileA]) testJobB["couch_record"] = makeUUID() testJobB.create(group=testJobGroupA) testJobB["state"] = "executing" runJobB = RunJob() runJobB.buildFromJob(testJobB) runJobB["status"] = "PEND" # Does not have a location, white listed to site 1 testJobC = Job(name="testJobC", files=[testFileA]) testJobC["couch_record"] = makeUUID() testJobC.create(group=testJobGroupA) testJobC["state"] = "new" # Site 2, Has been assigned a location and is complete. testJobD = Job(name="testJobD", files=[testFileA]) testJobD["couch_record"] = makeUUID() testJobD.create(group=testJobGroupB) testJobD["state"] = "success" # Site 2, Has been assigned a location and is incomplete. testJobE = Job(name="testJobE", files=[testFileA]) testJobE["couch_record"] = makeUUID() testJobE.create(group=testJobGroupB) testJobE["state"] = "executing" runJobE = RunJob() runJobE.buildFromJob(testJobE) runJobE["status"] = "RUN" # Does not have a location, site 1 is blacklisted. testJobF = Job(name="testJobF", files=[testFileA]) testJobF["couch_record"] = makeUUID() testJobF.create(group=testJobGroupB) testJobF["state"] = "new" # Site 3, Has been assigned a location and is complete. testJobG = Job(name="testJobG", files=[testFileA]) testJobG["couch_record"] = makeUUID() testJobG.create(group=testJobGroupC) testJobG["state"] = "cleanout" # Site 3, Has been assigned a location and is incomplete. testJobH = Job(name="testJobH", files=[testFileA]) testJobH["couch_record"] = makeUUID() testJobH.create(group=testJobGroupC) testJobH["state"] = "new" # Site 3, Does not have a location. testJobI = Job(name="testJobI", files=[testFileA]) testJobI["couch_record"] = makeUUID() testJobI.create(group=testJobGroupC) testJobI["state"] = "new" # Site 3, Does not have a location and is in cleanout. testJobJ = Job(name="testJobJ", files=[testFileA]) testJobJ["couch_record"] = makeUUID() testJobJ.create(group=testJobGroupC) testJobJ["state"] = "cleanout" changeStateAction = self.daoFactory(classname="Jobs.ChangeState") changeStateAction.execute(jobs=[ testJobA, testJobB, testJobC, testJobD, testJobE, testJobF, testJobG, testJobH, testJobI, testJobJ ]) self.insertRunJob.execute([runJobB, runJobE]) setLocationAction = self.daoFactory(classname="Jobs.SetLocation") setLocationAction.execute(testJobA["id"], "testSite1") setLocationAction.execute(testJobB["id"], "testSite1") setLocationAction.execute(testJobD["id"], "testSite1") setLocationAction.execute(testJobE["id"], "testSite1") setLocationAction.execute(testJobG["id"], "testSite1") setLocationAction.execute(testJobH["id"], "testSite1") createThresholds = myResourceControl.listThresholdsForCreate() submitThresholds = myResourceControl.listThresholdsForSubmit() self.assertEqual(len(createThresholds.keys()), 2, "Error: Wrong number of sites in create thresholds") self.assertEqual(createThresholds["testSite1"]["total_slots"], 10, "Error: Wrong number of slots for site 1") self.assertEqual(createThresholds["testSite2"]["total_slots"], 20, "Error: Wrong number of slots for site 2") # We should have two running jobs with locations at site one, # two running jobs without locations at site two, and one running # job without a location at site one and two. self.assertEqual(createThresholds["testSite1"]["pending_jobs"], 4, "Error: Wrong number of pending jobs for site 1") # We should have one running job with a location at site 2 and # another running job without a location. self.assertEqual(createThresholds["testSite2"]["pending_jobs"], 2, "Error: Wrong number of pending jobs for site 2") # We should also have a phedex_name self.assertEqual(createThresholds["testSite1"]["cms_name"], "T1_US_FNAL") self.assertEqual(createThresholds["testSite2"]["cms_name"], None) mergeThreshold1 = None mergeThreshold2 = None procThreshold1 = None procThreshold2 = None self.assertEqual(submitThresholds["testSite1"]['cms_name'], 'T1_US_FNAL') for threshold in submitThresholds["testSite1"]["thresholds"]: if threshold['task_type'] == "Merge": mergeThreshold1 = threshold elif threshold['task_type'] == "Processing": procThreshold1 = threshold self.assertEqual(submitThresholds["testSite2"]['cms_name'], None) for threshold in submitThresholds["testSite2"]["thresholds"]: if threshold['task_type'] == "Merge": mergeThreshold2 = threshold elif threshold['task_type'] == "Processing": procThreshold2 = threshold self.assertEqual( submitThresholds["testSite1"]["total_running_jobs"], 1, "Error: Wrong number of running jobs for submit thresholds.") self.assertEqual( submitThresholds["testSite2"]["total_running_jobs"], 0, "Error: Wrong number of running jobs for submit thresholds.") self.assertEqual( submitThresholds["testSite1"]["total_pending_jobs"], 1, "Error: Wrong number of pending jobs for submit thresholds.") self.assertEqual( submitThresholds["testSite2"]["total_pending_jobs"], 0, "Error: Wrong number of pending jobs for submit thresholds.") self.assertEqual( mergeThreshold1["task_running_jobs"], 0, "Error: Wrong number of task running jobs for submit thresholds.") self.assertEqual( mergeThreshold1["task_pending_jobs"], 0, "Error: Wrong number of task running jobs for submit thresholds.") self.assertEqual( procThreshold1["task_running_jobs"], 1, "Error: Wrong number of task running jobs for submit thresholds.") self.assertEqual( procThreshold1["task_pending_jobs"], 1, "Error: Wrong number of task running jobs for submit thresholds.") self.assertEqual( mergeThreshold2["task_running_jobs"], 0, "Error: Wrong number of task running jobs for submit thresholds.") self.assertEqual( mergeThreshold2["task_pending_jobs"], 0, "Error: Wrong number of task running jobs for submit thresholds.") self.assertEqual( procThreshold2["task_running_jobs"], 0, "Error: Wrong number of task running jobs for submit thresholds.") self.assertEqual( procThreshold2["task_pending_jobs"], 0, "Error: Wrong number of task running jobs for submit thresholds.") return
def createJobs(self): """ _createJobs_ Create test jobs in WMBS and BossAir """ testWorkflow = Workflow(spec = makeUUID(), owner = "tapas", name = makeUUID(), task = "Test") testWorkflow.create() testFilesetA = Fileset(name = "TestFilesetA") testFilesetA.create() testFilesetB = Fileset(name = "TestFilesetB") testFilesetB.create() testFilesetC = Fileset(name = "TestFilesetC") testFilesetC.create() testFileA = File(lfn = "testFileA", locations = set(["testSE1", "testSE2"])) testFileA.create() testFilesetA.addFile(testFileA) testFilesetA.commit() testFilesetB.addFile(testFileA) testFilesetB.commit() testFilesetC.addFile(testFileA) testFilesetC.commit() testSubscriptionA = Subscription(fileset = testFilesetA, workflow = testWorkflow, type = "Processing") testSubscriptionA.create() testSubscriptionA.addWhiteBlackList([{"site_name": "testSite1", "valid": True}]) testSubscriptionB = Subscription(fileset = testFilesetB, workflow = testWorkflow, type = "Processing") testSubscriptionB.create() testSubscriptionB.addWhiteBlackList([{"site_name": "testSite1", "valid": False}]) testSubscriptionC = Subscription(fileset = testFilesetC, workflow = testWorkflow, type = "Merge") testSubscriptionC.create() testJobGroupA = JobGroup(subscription = testSubscriptionA) testJobGroupA.create() testJobGroupB = JobGroup(subscription = testSubscriptionB) testJobGroupB.create() testJobGroupC = JobGroup(subscription = testSubscriptionC) testJobGroupC.create() # Site1, Has been assigned a location and is complete. testJobA = Job(name = "testJobA", files = [testFileA]) testJobA["couch_record"] = makeUUID() testJobA.create(group = testJobGroupA) testJobA["state"] = "success" # Site 1, Has been assigned a location and is incomplete. testJobB = Job(name = "testJobB", files = [testFileA]) testJobB["couch_record"] = makeUUID() testJobB["cache_dir"] = self.tempDir testJobB.create(group = testJobGroupA) testJobB["state"] = "executing" runJobB = RunJob() runJobB.buildFromJob(testJobB) runJobB["status"] = "PEND" # Does not have a location, white listed to site 1 testJobC = Job(name = "testJobC", files = [testFileA]) testJobC["couch_record"] = makeUUID() testJobC.create(group = testJobGroupA) testJobC["state"] = "new" # Site 2, Has been assigned a location and is complete. testJobD = Job(name = "testJobD", files = [testFileA]) testJobD["couch_record"] = makeUUID() testJobD.create(group = testJobGroupB) testJobD["state"] = "success" # Site 2, Has been assigned a location and is incomplete. testJobE = Job(name = "testJobE", files = [testFileA]) testJobE["couch_record"] = makeUUID() testJobE.create(group = testJobGroupB) testJobE["state"] = "executing" runJobE = RunJob() runJobE.buildFromJob(testJobE) runJobE["status"] = "RUN" # Does not have a location, site 1 is blacklisted. testJobF = Job(name = "testJobF", files = [testFileA]) testJobF["couch_record"] = makeUUID() testJobF.create(group = testJobGroupB) testJobF["state"] = "new" # Site 3, Has been assigned a location and is complete. testJobG = Job(name = "testJobG", files = [testFileA]) testJobG["couch_record"] = makeUUID() testJobG.create(group = testJobGroupC) testJobG["state"] = "cleanout" # Site 3, Has been assigned a location and is incomplete. testJobH = Job(name = "testJobH", files = [testFileA]) testJobH["couch_record"] = makeUUID() testJobH.create(group = testJobGroupC) testJobH["state"] = "new" # Site 3, Does not have a location. testJobI = Job(name = "testJobI", files = [testFileA]) testJobI["couch_record"] = makeUUID() testJobI.create(group = testJobGroupC) testJobI["state"] = "new" # Site 3, Does not have a location and is in cleanout. testJobJ = Job(name = "testJobJ", files = [testFileA]) testJobJ["couch_record"] = makeUUID() testJobJ.create(group = testJobGroupC) testJobJ["state"] = "cleanout" changeStateAction = self.daoFactory(classname = "Jobs.ChangeState") changeStateAction.execute(jobs = [testJobA, testJobB, testJobC, testJobD, testJobE, testJobF, testJobG, testJobH, testJobI, testJobJ]) self.insertRunJob.execute([runJobB, runJobE]) setLocationAction = self.daoFactory(classname = "Jobs.SetLocation") setLocationAction.execute(testJobA["id"], "testSite1") setLocationAction.execute(testJobB["id"], "testSite1") setLocationAction.execute(testJobD["id"], "testSite1") setLocationAction.execute(testJobE["id"], "testSite2") setLocationAction.execute(testJobG["id"], "testSite1") setLocationAction.execute(testJobH["id"], "testSite1") return
def submit(self, jobs, info=None): """ _submit_ Submit jobs using the plugin Requires both plugin name and workflow user from submitter Deals internally in RunJob objects, but interfaces to the outside with WMBS Job analogs Returns (successes, failures) """ self.check() successJobs = [] failureJobs = [] # TODO: Add plugin and user to input via JobSubmitter # IMPORTANT IMPORTANT IMPORTANT # Put job into RunJob format pluginDict = {} for job in jobs: rj = RunJob() rj.buildFromJob(job=job) if not job.get('location', False): rj['location'] = job.get('custom', {}).get('location', None) plugin = rj['plugin'] pluginDict.setdefault(plugin, []) pluginDict[plugin].append(rj) # Can't add to the cache in submit() # It's NOT the same bossAir instance # self.jobs.append(rj) for plugin in pluginDict.keys(): if plugin not in self.plugins.keys(): # Then we have a non-existant plugin msg = "CRITICAL ERROR: Non-existant plugin!\n" msg += "Given a plugin %s that we don't have access to.\n" % (plugin) msg += "Ignoring the jobs for this plugin for now" logging.error(msg) continue try: pluginInst = self.plugins[plugin] jobsToSubmit = pluginDict.get(plugin, []) logging.debug("About to submit %i jobs to plugin %s", len(jobsToSubmit), plugin) localSuccess, localFailure = pluginInst.submit(jobs=jobsToSubmit, info=info) for job in localSuccess: successJobs.append(job.buildWMBSJob()) for job in localFailure: failureJobs.append(job.buildWMBSJob()) except WMException: raise except Exception as ex: msg = "Unhandled exception while submitting jobs to plugin: %s\n" % plugin msg += str(ex) logging.error(msg) logging.debug("Jobs being submitted: %s\n", jobsToSubmit) logging.debug("Job info: %s\n", info) raise BossAirException(msg) finally: # make sure we release this memory pluginDict.clear() del jobsToSubmit[:] # Create successful jobs in BossAir try: logging.debug("About to create %i new jobs in BossAir", len(successJobs)) self.createNewJobs(wmbsJobs=successJobs) except WMException: raise except Exception as ex: msg = "Unhandled error in creation of %i new jobs.\n" % len(successJobs) msg += str(ex) logging.error(msg) logging.debug("Job: %s", successJobs) raise BossAirException(msg) return successJobs, failureJobs
def testList(self): """ _testList_ Test the functions that list thresholds for creating jobs and submitting jobs. """ myResourceControl = ResourceControl() myResourceControl.insertSite("testSite1", 10, 20, "testSE1", "testCE1", "T1_US_FNAL", "LsfPlugin") myResourceControl.insertSite("testSite2", 20, 40, "testSE2", "testCE2") myResourceControl.insertThreshold("testSite1", "Processing", 20, 10) myResourceControl.insertThreshold("testSite1", "Merge", 200, 100) myResourceControl.insertThreshold("testSite2", "Processing", 50, 25) myResourceControl.insertThreshold("testSite2", "Merge", 135, 65) testWorkflow = Workflow(spec = makeUUID(), owner = "Steve", name = makeUUID(), task = "Test") testWorkflow.create() testFilesetA = Fileset(name = "TestFilesetA") testFilesetA.create() testFilesetB = Fileset(name = "TestFilesetB") testFilesetB.create() testFilesetC = Fileset(name = "TestFilesetC") testFilesetC.create() testFileA = File(lfn = "testFileA", locations = set(["testSE1", "testSE2"])) testFileA.create() testFilesetA.addFile(testFileA) testFilesetA.commit() testFilesetB.addFile(testFileA) testFilesetB.commit() testFilesetC.addFile(testFileA) testFilesetC.commit() testSubscriptionA = Subscription(fileset = testFilesetA, workflow = testWorkflow, type = "Processing") testSubscriptionA.create() testSubscriptionA.addWhiteBlackList([{"site_name": "testSite1", "valid": True}]) testSubscriptionB = Subscription(fileset = testFilesetB, workflow = testWorkflow, type = "Processing") testSubscriptionB.create() testSubscriptionB.addWhiteBlackList([{"site_name": "testSite1", "valid": False}]) testSubscriptionC = Subscription(fileset = testFilesetC, workflow = testWorkflow, type = "Merge") testSubscriptionC.create() testJobGroupA = JobGroup(subscription = testSubscriptionA) testJobGroupA.create() testJobGroupB = JobGroup(subscription = testSubscriptionB) testJobGroupB.create() testJobGroupC = JobGroup(subscription = testSubscriptionC) testJobGroupC.create() # Site1, Has been assigned a location and is complete. testJobA = Job(name = "testJobA", files = [testFileA]) testJobA["couch_record"] = makeUUID() testJobA.create(group = testJobGroupA) testJobA["state"] = "success" # Site 1, Has been assigned a location and is incomplete. testJobB = Job(name = "testJobB", files = [testFileA]) testJobB["couch_record"] = makeUUID() testJobB.create(group = testJobGroupA) testJobB["state"] = "executing" runJobB = RunJob() runJobB.buildFromJob(testJobB) runJobB["status"] = "PEND" # Does not have a location, white listed to site 1 testJobC = Job(name = "testJobC", files = [testFileA]) testJobC["couch_record"] = makeUUID() testJobC.create(group = testJobGroupA) testJobC["state"] = "new" # Site 2, Has been assigned a location and is complete. testJobD = Job(name = "testJobD", files = [testFileA]) testJobD["couch_record"] = makeUUID() testJobD.create(group = testJobGroupB) testJobD["state"] = "success" # Site 2, Has been assigned a location and is incomplete. testJobE = Job(name = "testJobE", files = [testFileA]) testJobE["couch_record"] = makeUUID() testJobE.create(group = testJobGroupB) testJobE["state"] = "executing" runJobE = RunJob() runJobE.buildFromJob(testJobE) runJobE["status"] = "RUN" # Does not have a location, site 1 is blacklisted. testJobF = Job(name = "testJobF", files = [testFileA]) testJobF["couch_record"] = makeUUID() testJobF.create(group = testJobGroupB) testJobF["state"] = "new" # Site 3, Has been assigned a location and is complete. testJobG = Job(name = "testJobG", files = [testFileA]) testJobG["couch_record"] = makeUUID() testJobG.create(group = testJobGroupC) testJobG["state"] = "cleanout" # Site 3, Has been assigned a location and is incomplete. testJobH = Job(name = "testJobH", files = [testFileA]) testJobH["couch_record"] = makeUUID() testJobH.create(group = testJobGroupC) testJobH["state"] = "new" # Site 3, Does not have a location. testJobI = Job(name = "testJobI", files = [testFileA]) testJobI["couch_record"] = makeUUID() testJobI.create(group = testJobGroupC) testJobI["state"] = "new" # Site 3, Does not have a location and is in cleanout. testJobJ = Job(name = "testJobJ", files = [testFileA]) testJobJ["couch_record"] = makeUUID() testJobJ.create(group = testJobGroupC) testJobJ["state"] = "cleanout" changeStateAction = self.daoFactory(classname = "Jobs.ChangeState") changeStateAction.execute(jobs = [testJobA, testJobB, testJobC, testJobD, testJobE, testJobF, testJobG, testJobH, testJobI, testJobJ]) self.insertRunJob.execute([runJobB, runJobE]) setLocationAction = self.daoFactory(classname = "Jobs.SetLocation") setLocationAction.execute(testJobA["id"], "testSite1") setLocationAction.execute(testJobB["id"], "testSite1") setLocationAction.execute(testJobD["id"], "testSite1") setLocationAction.execute(testJobE["id"], "testSite1") setLocationAction.execute(testJobG["id"], "testSite1") setLocationAction.execute(testJobH["id"], "testSite1") createThresholds = myResourceControl.listThresholdsForCreate() submitThresholds = myResourceControl.listThresholdsForSubmit() self.assertEqual( len(createThresholds.keys()), 2, "Error: Wrong number of sites in create thresholds" ) self.assertEqual( createThresholds["testSite1"]["total_slots"], 10, "Error: Wrong number of slots for site 1" ) self.assertEqual( createThresholds["testSite2"]["total_slots"], 20, "Error: Wrong number of slots for site 2" ) # We should have two running jobs with locations at site one, # two running jobs without locations at site two, and one running # job without a location at site one and two. self.assertEqual( createThresholds["testSite1"]["pending_jobs"], 4, "Error: Wrong number of pending jobs for site 1" ) # We should have one running job with a location at site 2 and # another running job without a location. self.assertEqual( createThresholds["testSite2"]["pending_jobs"], 2, "Error: Wrong number of pending jobs for site 2" ) # We should also have a phedex_name self.assertEqual(createThresholds["testSite1"]["cms_name"], "T1_US_FNAL") self.assertEqual(createThresholds["testSite2"]["cms_name"], None) mergeThreshold1 = None mergeThreshold2 = None procThreshold1 = None procThreshold2 = None self.assertEqual(submitThresholds["testSite1"]['cms_name'], 'T1_US_FNAL') for threshold in submitThresholds["testSite1"]["thresholds"]: if threshold['task_type'] == "Merge": mergeThreshold1 = threshold elif threshold['task_type'] == "Processing": procThreshold1 = threshold self.assertEqual(submitThresholds["testSite2"]['cms_name'], None) for threshold in submitThresholds["testSite2"]["thresholds"]: if threshold['task_type'] == "Merge": mergeThreshold2 = threshold elif threshold['task_type'] == "Processing": procThreshold2 = threshold self.assertEqual(submitThresholds["testSite1"]["total_running_jobs"], 1, "Error: Wrong number of running jobs for submit thresholds.") self.assertEqual(submitThresholds["testSite2"]["total_running_jobs"], 0, "Error: Wrong number of running jobs for submit thresholds.") self.assertEqual(submitThresholds["testSite1"]["total_pending_jobs"], 1, "Error: Wrong number of pending jobs for submit thresholds.") self.assertEqual(submitThresholds["testSite2"]["total_pending_jobs"], 0, "Error: Wrong number of pending jobs for submit thresholds.") self.assertEqual(mergeThreshold1["task_running_jobs"], 0, "Error: Wrong number of task running jobs for submit thresholds.") self.assertEqual(mergeThreshold1["task_pending_jobs"], 0, "Error: Wrong number of task running jobs for submit thresholds.") self.assertEqual(procThreshold1["task_running_jobs"], 1, "Error: Wrong number of task running jobs for submit thresholds.") self.assertEqual(procThreshold1["task_pending_jobs"], 1, "Error: Wrong number of task running jobs for submit thresholds.") self.assertEqual(mergeThreshold2["task_running_jobs"], 0, "Error: Wrong number of task running jobs for submit thresholds.") self.assertEqual(mergeThreshold2["task_pending_jobs"], 0, "Error: Wrong number of task running jobs for submit thresholds.") self.assertEqual(procThreshold2["task_running_jobs"], 0, "Error: Wrong number of task running jobs for submit thresholds.") self.assertEqual(procThreshold2["task_pending_jobs"], 0, "Error: Wrong number of task running jobs for submit thresholds.") return