def loadJobsFromList(self, idList): """ _loadJobsFromList_ Load jobs in bulk """ binds = [] for jobID in idList: binds.append({"jobid": jobID}) results = self.idLoad.execute(jobID = binds) # You have to have a list if type(results) == dict: results = [results] listOfJobs = [] for entry in results: # One job per entry tmpJob = Job(id = entry['id']) tmpJob.update(entry) listOfJobs.append(tmpJob) return listOfJobs
def loadData(self): """ _loadData_ Load all data that is associated with the jobgroup. This includes loading all the subscription information, the output fileset information and all the jobs that are associated with the group. """ existingTransaction = self.beginTransaction() if self.id < 0 or self.uid == None: self.load() self.subscription.loadData() self.output.loadData() loadAction = self.daofactory(classname = "JobGroup.LoadJobs") result = loadAction.execute(self.id, conn = self.getDBConn(), transaction = self.existingTransaction()) self.jobs = [] self.newjobs = [] for jobID in result: newJob = Job(id = jobID["id"]) newJob.loadData() self.add(newJob) WMJobGroup.commit(self) self.commitTransaction(existingTransaction) return
def createSingleJobWorkflow(self): """ Create a workflow with one jobs and two files and store the results in instance variables """ self.testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") self.testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=self.testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() self.testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) self.testFileA.addRun(Run(1, *[45])) self.testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) self.testFileB.addRun(Run(1, *[46])) self.testFileA.create() self.testFileB.create() self.testJob = Job(name="TestJob", files=[self.testFileA, self.testFileB]) self.testJob.create(group=testJobGroup) self.testJob.associateFiles()
def loadJobsFromList(self, idList): """ _loadJobsFromList_ Load jobs in bulk """ loadAction = self.daoFactory(classname="Jobs.LoadFromID") getTypeAction = self.daoFactory(classname="Jobs.GetType") binds = [] for jobID in idList: binds.append({"jobid": jobID}) results = loadAction.execute(jobID=binds) typeResults = getTypeAction.execute(jobID=idList) subTypes = {} for typeEntry in typeResults: subTypes[typeEntry['id']] = typeEntry['type'] # You have to have a list if isinstance(results, dict): results = [results] listOfJobs = [] for entry in results: # One job per entry tmpJob = Job(id=entry['id']) tmpJob.update(entry) tmpJob['jobType'] = subTypes[entry['id']] listOfJobs.append(tmpJob) return listOfJobs
def createTestJob(subscriptionType="Merge"): """ _createTestJob_ Create a test job with two files as input. This will also create the appropriate workflow, jobgroup and subscription. """ testWorkflow = Workflow(spec=makeUUID(), owner="Simon", name=makeUUID(), task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type=subscriptionType) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(1, *[45])) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(1, *[46])) testFileA.create() testFileB.create() testJob = Job(name=makeUUID(), files=[testFileA, testFileB]) testJob["couch_record"] = "somecouchrecord" testJob["location"] = "test.site.ch" testJob.create(group=testJobGroup) testJob.associateFiles() return testJob
def loadJobsFromListFull(self, idList): """ _loadJobsFromList_ Load jobs in bulk. Include the full metadata. """ binds = [] for jobID in idList: binds.append({"jobid": jobID}) results = self.loadAction.execute(jobID = binds) # You have to have a list if type(results) == dict: results = [results] listOfJobs = [] for entry in results: # One job per entry tmpJob = Job(id = entry['id']) tmpJob.update(entry) listOfJobs.append(tmpJob) return listOfJobs
def loadData(self): """ _loadData_ Load all data that is associated with the jobgroup. This includes loading all the subscription information, the output fileset information and all the jobs that are associated with the group. """ existingTransaction = self.beginTransaction() if self.id < 0 or self.uid == None: self.load() self.subscription.loadData() self.output.loadData() loadAction = self.daofactory(classname="JobGroup.LoadJobs") result = loadAction.execute(self.id, conn=self.getDBConn(), transaction=self.existingTransaction()) self.jobs = [] self.newjobs = [] for jobID in result: newJob = Job(id=jobID["id"]) newJob.loadData() self.add(newJob) WMJobGroup.commit(self) self.commitTransaction(existingTransaction) return
def loadJobsFromList(self, idList): """ _loadJobsFromList_ Load jobs in bulk """ loadAction = self.daoFactory(classname = "Jobs.LoadForErrorHandler") binds = [] for jobID in idList: binds.append({"jobid": jobID}) results = loadAction.execute(jobID = binds) # You have to have a list if type(results) == dict: results = [results] listOfJobs = [] for entry in results: # One job per entry tmpJob = Job(id = entry['id']) tmpJob.update(entry) listOfJobs.append(tmpJob) return listOfJobs
def loadJobsFromListFull(self, idList): """ _loadJobsFromList_ Load jobs in bulk. Include the full metadata. """ binds = [] for jobID in idList: binds.append({"jobid": jobID}) results = self.loadAction.execute(jobID=binds) # You have to have a list if isinstance(results, dict): results = [results] listOfJobs = [] for entry in results: # One job per entry tmpJob = Job(id=entry['id']) tmpJob.update(entry) listOfJobs.append(tmpJob) return listOfJobs
def killWorkflow(workflowName, jobCouchConfig, bossAirConfig=None): """ _killWorkflow_ Kill a workflow that is already executing inside the agent. This will mark all incomplete jobs as failed and files that belong to all non-cleanup and non-logcollect subscriptions as failed. The name of the JSM couch database and the URL to the database must be passed in as well so the state transitions are logged. """ myThread = threading.currentThread() daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) killFilesAction = daoFactory(classname="Subscriptions.KillWorkflow") killJobsAction = daoFactory(classname="Jobs.KillWorkflow") existingTransaction = False if myThread.transaction.conn: existingTransaction = True else: myThread.transaction.begin() killFilesAction.execute(workflowName=workflowName, conn=myThread.transaction.conn, transaction=True) liveJobs = killJobsAction.execute(workflowName=workflowName, conn=myThread.transaction.conn, transaction=True) changeState = ChangeState(jobCouchConfig) # Deal with any jobs that are running in the batch system # only works if we can start the API if bossAirConfig: bossAir = BossAirAPI(config=bossAirConfig, noSetup=True) killableJobs = [] for liveJob in liveJobs: if liveJob["state"].lower() == 'executing': # Then we need to kill this on the batch system liveWMBSJob = Job(id=liveJob["id"]) liveWMBSJob.update(liveJob) changeState.propagate(liveWMBSJob, "killed", liveJob["state"]) killableJobs.append(liveJob) # Now kill them try: bossAir.kill(jobs=killableJobs) except BossAirException, ex: # Something's gone wrong # Jobs not killed! logging.error( "Error while trying to kill running jobs in workflow!\n") logging.error(str(ex)) trace = getattr(ex, 'traceback', '') logging.error(trace) # But continue; we need to kill the jobs in the master # the batch system will have to take care of itself. pass
def handleFailed(self, jobID, fwkJobReport): """ _handleFailed_ Handle a failed job. Update the job's metadata marking the outcome as 'failure' and incrementing the retry count. Mark all the files used as input for the job as failed. Finally, update the job's state. """ wmbsJob = Job(id=jobID) wmbsJob.load() outputID = wmbsJob.loadOutputID() wmbsJob["outcome"] = "failure" #wmbsJob.save() # We'll fake the rest of the state transitions here as the rest of the # WMAgent job submission framework is not yet complete. wmbsJob["fwjr"] = fwkJobReport outputMap = self.getOutputMapAction.execute( jobID=jobID, conn=self.getDBConn(), transaction=self.existingTransaction()) jobType = self.getJobTypeAction.execute( jobID=jobID, conn=self.getDBConn(), transaction=self.existingTransaction()) fileList = fwkJobReport.getAllFilesFromStep(step='logArch1') for fwjrFile in fileList: wmbsFile = self.addFileToWMBS(jobType, fwjrFile, wmbsJob["mask"], jobID=jobID, task=fwkJobReport.getTaskName()) merged = fwjrFile['merged'] moduleLabel = fwjrFile["module_label"] if merged: self.mergedOutputFiles.append(wmbsFile) self.filesetAssoc.append({ "lfn": wmbsFile["lfn"], "fileset": outputID }) outputFilesets = self.outputFilesetsForJob(outputMap, merged, moduleLabel) for outputFileset in outputFilesets: self.filesetAssoc.append({ "lfn": wmbsFile["lfn"], "fileset": outputFileset }) self._mapLocation(wmbsJob['fwjr']) self.listOfJobsToFail.append(wmbsJob) return
def killWorkflow(workflowName, jobCouchConfig, bossAirConfig = None): """ _killWorkflow_ Kill a workflow that is already executing inside the agent. This will mark all incomplete jobs as failed and files that belong to all non-cleanup and non-logcollect subscriptions as failed. The name of the JSM couch database and the URL to the database must be passed in as well so the state transitions are logged. """ myThread = threading.currentThread() daoFactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) killFilesAction = daoFactory(classname = "Subscriptions.KillWorkflow") killJobsAction = daoFactory(classname = "Jobs.KillWorkflow") existingTransaction = False if myThread.transaction.conn: existingTransaction = True else: myThread.transaction.begin() killFilesAction.execute(workflowName = workflowName, conn = myThread.transaction.conn, transaction = True) liveJobs = killJobsAction.execute(workflowName = workflowName, conn = myThread.transaction.conn, transaction = True) changeState = ChangeState(jobCouchConfig) # Deal with any jobs that are running in the batch system # only works if we can start the API if bossAirConfig: bossAir = BossAirAPI(config = bossAirConfig, noSetup = True) killableJobs = [] for liveJob in liveJobs: if liveJob["state"].lower() == 'executing': # Then we need to kill this on the batch system liveWMBSJob = Job(id = liveJob["id"]) liveWMBSJob.update(liveJob) changeState.propagate(liveWMBSJob, "killed", liveJob["state"]) killableJobs.append(liveJob) # Now kill them try: bossAir.kill(jobs = killableJobs) except BossAirException, ex: # Something's gone wrong # Jobs not killed! logging.error("Error while trying to kill running jobs in workflow!\n") logging.error(str(ex)) trace = getattr(ex, 'traceback', '') logging.error(trace) # But continue; we need to kill the jobs in the master # the batch system will have to take care of itself. pass
def testCreateDeleteExists(self): """ _testCreateDeleteExists_ Create and then delete a job. Use the job class's exists() method to determine if the job has been written to the database before it is created, after it has been created and after it has been deleted. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileA.create() testFileB.create() testJob = Job(name="TestJob", files=[testFileA, testFileB]) testWU = WorkUnit(taskID=testWorkflow.id, fileid=testFileA['id'], runLumi=Run(1, *[44])) self.assertFalse(testJob.exists(), "Job exists before it was created") self.assertFalse(testWU.exists(), "WorkUnit exists before it was created") testJob.create(group=testJobGroup) self.assertTrue(testJob.exists(), "Job does not exist after it was created") self.assertFalse(testWU.exists(), "WorkUnit exists when there is no work") # Test the getWorkflow method workflow = testJob.getWorkflow() self.assertEqual(workflow['task'], 'Test') self.assertEqual(workflow['name'], 'wf001') testJob.delete() self.assertFalse(testJob.exists(), "Job exists after it was deleted") self.assertFalse(testWU.exists(), "WorkUnit exists after job is deleted") return
def testSaveTransaction(self): """ _testSaveTransaction_ Create a job and a job mask and save them both to the database. Load the job from the database and verify that everything was written correctly. Begin a new transaction and update the job mask again. Load the mask and verify that it's correct. Finally, rollback the transaction and reload the mask to verify that it is in the correct state. """ testJobA = self.createTestJob() testJobA["mask"]["FirstEvent"] = 1 testJobA["mask"]["LastEvent"] = 2 testJobA["mask"]["FirstLumi"] = 3 testJobA["mask"]["LastLumi"] = 4 testJobA["mask"]["FirstRun"] = 5 testJobA["mask"]["LastRun"] = 6 testJobA.save() testJobB = Job(id=testJobA["id"]) testJobB.loadData() assert testJobA["mask"] == testJobB["mask"], "ERROR: Job mask did not load properly" myThread = threading.currentThread() myThread.transaction.begin() testJobA["mask"]["FirstEvent"] = 7 testJobA["mask"]["LastEvent"] = 8 testJobA["mask"]["FirstLumi"] = 9 testJobA["mask"]["LastLumi"] = 10 testJobA["mask"]["FirstRun"] = 11 testJobA["mask"]["LastRun"] = 12 testJobA["name"] = "stevesJob" testJobA["couch_record"] = "someCouchRecord" testJobA["location"] = "test2.site.ch" testJobA.save() testJobC = Job(id=testJobA["id"]) testJobC.loadData() assert testJobA["mask"] == testJobC["mask"], "ERROR: Job mask did not load properly" assert testJobC["name"] == "stevesJob", "ERROR: Job name did not save" assert testJobC["couch_record"] == "someCouchRecord", "ERROR: Job couch record did not save" assert testJobC["location"] == "test2.site.ch", "ERROR: Job site did not save" myThread.transaction.rollback() testJobD = Job(id=testJobA["id"]) testJobD.loadData() assert testJobB["mask"] == testJobD["mask"], "ERROR: Job mask did not load properly" return
def testListRunningJobs(self): """ _testListRunningJobs_ Test the ListRunningJobs DAO. """ testWorkflow = Workflow(spec = makeUUID(), owner = "Steve", name = makeUUID(), task="Test") testWorkflow.create() testFileset = Fileset(name = "TestFileset") testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testJobA = Job(name = makeUUID(), files = []) testJobA["couch_record"] = makeUUID() testJobA.create(group = testJobGroup) testJobA["state"] = "executing" testJobB = Job(name = makeUUID(), files = []) testJobB["couch_record"] = makeUUID() testJobB.create(group = testJobGroup) testJobB["state"] = "complete" testJobC = Job(name = makeUUID(), files = []) testJobC["couch_record"] = makeUUID() testJobC.create(group = testJobGroup) testJobC["state"] = "new" changeStateAction = self.daoFactory(classname = "Jobs.ChangeState") changeStateAction.execute(jobs = [testJobA, testJobB, testJobC]) runningJobsAction = self.daoFactory(classname = "Monitoring.ListRunningJobs") runningJobs = runningJobsAction.execute() assert len(runningJobs) == 2, \ "Error: Wrong number of running jobs returned." for runningJob in runningJobs: if runningJob["job_name"] == testJobA["name"]: assert runningJob["state"] == testJobA["state"], \ "Error: Running job has wrong state." assert runningJob["couch_record"] == testJobA["couch_record"], \ "Error: Running job has wrong couch record." else: assert runningJob["job_name"] == testJobC["name"], \ "Error: Running job has wrong name." assert runningJob["state"] == testJobC["state"], \ "Error: Running job has wrong state." assert runningJob["couch_record"] == testJobC["couch_record"], \ "Error: Running job has wrong couch record." return
def handleNeedsASO(self, jobID, fwkJobReport, fwkJobReportPath = None, asoOutputCount = 0): """ _handleNeedsASO_ If we see the FWJR asks for ASO, move to a set of ASO states before moving to success """ wmbsJob = Job(id = jobID) wmbsJob.load() self.listOfJobsNeedingASO.append(wmbsJob) return
def testDeleteTransaction(self): """ _testDeleteTransaction_ Create a new job and commit it to the database. Start a new transaction and delete the file from the database. Verify that the file has been deleted. After that, roll back the transaction and verify that the job is once again in the database. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileA.create() testFileB.create() testJob = Job(name="TestJob", files=[testFileA, testFileB]) assert testJob.exists() == False, \ "ERROR: Job exists before it was created" testJob.create(group=testJobGroup) assert testJob.exists() >= 0, \ "ERROR: Job does not exist after it was created" myThread = threading.currentThread() myThread.transaction.begin() testJob.delete() assert testJob.exists() == False, \ "ERROR: Job exists after it was delete" myThread.transaction.rollback() assert testJob.exists() >= 0, \ "ERROR: Job does not exist after transaction was rolled back." return
def testGetGroupsByJobStateDAO(self): """ _testGetGroupsByJobStateDAO_ Verify that the GetGrounsByJobState DAO does what it is supposed to do. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = WMBSFileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroupA = JobGroup(subscription=testSubscription) testJobGroupA.create() testJobGroupB = JobGroup(subscription=testSubscription) testJobGroupB.create() testJobA = Job(name="TestJobA") testJobB = Job(name="TestJobB") testJobGroupA.add(testJobA) testJobGroupB.add(testJobB) testJobGroupA.commit() testJobGroupB.commit() myThread = threading.currentThread() daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) stateChangeAction = daofactory(classname="Jobs.ChangeState") testJobA["state"] = "complete" testJobB["state"] = "executing" stateChangeAction.execute(jobs=[testJobA, testJobB]) jobGroupAction = daofactory(classname="JobGroup.GetGroupsByJobState") jobGroups = jobGroupAction.execute(jobState="complete") assert len(jobGroups) == 1, \ "Error: Wrong number of job groups returned." assert jobGroups[0] == testJobGroupA.id, \ "Error: Wrong job group returned." return
def testCreateDeleteExistsNoFiles(self): """ _testCreateDeleteExistsNoFiles_ Create and then delete a job but don't add any input files to it. Use the job class's exists() method to determine if the job has been written to the database before it is created, after it has been created and after it has been deleted. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job(name="TestJob") assert testJob.exists() == False, "ERROR: Job exists before it was created" testJob.create(group=testJobGroup) assert testJob.exists() >= 0, "ERROR: Job does not exist after it was created" testJob.delete() assert testJob.exists() == False, "ERROR: Job exists after it was delete" return
def testLoad(self): """ _testLoad_ Create a job and save it to the database. Load it back from the database using the name and the id and then verify that all information was loaded correctly. """ testJobA = self.createTestJob() testJobB = Job(id=testJobA["id"]) testJobC = Job(name=testJobA["name"]) testJobB.load() testJobC.load() assert (testJobA["id"] == testJobB["id"]) and \ (testJobA["name"] == testJobB["name"]) and \ (testJobA["jobgroup"] == testJobB["jobgroup"]) and \ (testJobA["couch_record"] == testJobB["couch_record"]) and \ (testJobA["location"] == testJobB["location"]), \ "ERROR: Load from ID didn't load everything correctly" assert (testJobA["id"] == testJobC["id"]) and \ (testJobA["name"] == testJobC["name"]) and \ (testJobA["jobgroup"] == testJobC["jobgroup"]) and \ (testJobA["couch_record"] == testJobC["couch_record"]) and \ (testJobA["location"] == testJobC["location"]), \ "ERROR: Load from name didn't load everything correctly" self.assertEqual(testJobB['outcome'], 'failure') self.assertEqual(testJobC['outcome'], 'failure') self.assertEqual(testJobB['fwjr'], None) self.assertEqual(testJobC['fwjr'], None) return
def notestCreateDeleteExists(self): """ Create and then delete a job and workflow. Use the workunit class's exists() method to determine if the workunit has been written to the database before the job is created, after the job has been created, and after the workflow has been deleted. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(1, *[45])) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(1, *[46])) testFileA.create() testFileB.create() testJob = Job(name="TestJob", files=[testFileA, testFileB]) testWU1 = WorkUnit(taskID=testWorkflow.id, fileid=testFileA['id'], runLumi=Run(1, *[45])) testWU2 = WorkUnit(taskID=testWorkflow.id, fileid=testFileB['id'], runLumi=Run(1, *[46])) self.assertFalse(testWU1.exists(), "WorkUnit exists before job was created") self.assertFalse(testWU2.exists(), "WorkUnit exists before job was created") testJob.create(group=testJobGroup) self.assertTrue(testWU1.exists(), "WorkUnit does not exist after job was created") self.assertTrue(testWU2.exists(), "WorkUnit does not exist after job was created") testJob.delete() self.assertTrue(testWU1.exists(), "WorkUnit does not exist after job is deleted") self.assertTrue(testWU2.exists(), "WorkUnit does not exist after job is deleted") testWorkflow.delete() self.assertFalse(testWU1.exists(), "WorkUnit exists after workflow is deleted") self.assertFalse(testWU2.exists(), "WorkUnit exists after workflow is deleted") return
def handleFailed(self, jobID, fwkJobReport): """ _handleFailed_ Handle a failed job. Update the job's metadata marking the outcome as 'failure' and incrementing the retry count. Mark all the files used as input for the job as failed. Finally, update the job's state. """ wmbsJob = Job(id = jobID) wmbsJob.load() outputID = wmbsJob.loadOutputID() wmbsJob["outcome"] = "failure" #wmbsJob.save() # We'll fake the rest of the state transitions here as the rest of the # WMAgent job submission framework is not yet complete. wmbsJob["fwjr"] = fwkJobReport outputMap = self.getOutputMapAction.execute(jobID = jobID, conn = self.getDBConn(), transaction = self.existingTransaction()) jobType = self.getJobTypeAction.execute(jobID = jobID, conn = self.getDBConn(), transaction = self.existingTransaction()) fileList = fwkJobReport.getAllFilesFromStep(step = 'logArch1') if len(fileList) > 0: # Need task name info to proceed self.isTaskExistInFWJR(fwkJobReport, "failed") for fwjrFile in fileList: wmbsFile = self.addFileToWMBS(jobType, fwjrFile, wmbsJob["mask"], jobID = jobID, task = fwkJobReport.getTaskName()) merged = fwjrFile['merged'] moduleLabel = fwjrFile["module_label"] if merged: self.mergedOutputFiles.append(wmbsFile) self.filesetAssoc.append({"lfn": wmbsFile["lfn"], "fileset": outputID}) outputFilesets = self.outputFilesetsForJob(outputMap, merged, moduleLabel) for outputFileset in outputFilesets: self.filesetAssoc.append({"lfn": wmbsFile["lfn"], "fileset": outputFileset}) self._mapLocation(wmbsJob['fwjr']) self.listOfJobsToFail.append(wmbsJob) return
def testLoadData(self): """ _testLoadData_ Create a job and save it to the database. Load it back from the database using the name and the id. Verify that all job information is correct including input files and the job mask. """ testJobA = self.createTestJob() testJobA["mask"]["FirstEvent"] = 1 testJobA["mask"]["LastEvent"] = 2 testJobA["mask"]["FirstLumi"] = 3 testJobA["mask"]["LastLumi"] = 4 testJobA["mask"]["FirstRun"] = 5 testJobA["mask"]["LastRun"] = 6 testJobA.save() testJobB = Job(id=testJobA["id"]) testJobC = Job(name=testJobA["name"]) testJobB.loadData() testJobC.loadData() assert ( (testJobA["id"] == testJobB["id"]) and (testJobA["name"] == testJobB["name"]) and (testJobA["jobgroup"] == testJobB["jobgroup"]) and (testJobA["couch_record"] == testJobB["couch_record"]) and (testJobA["location"] == testJobB["location"]) ), "ERROR: Load from ID didn't load everything correctly" assert ( (testJobA["id"] == testJobC["id"]) and (testJobA["name"] == testJobC["name"]) and (testJobA["jobgroup"] == testJobC["jobgroup"]) and (testJobA["couch_record"] == testJobC["couch_record"]) and (testJobA["location"] == testJobC["location"]) ), "ERROR: Load from name didn't load everything correctly" assert testJobA["mask"] == testJobB["mask"], "ERROR: Job mask did not load properly" assert testJobA["mask"] == testJobC["mask"], "ERROR: Job mask did not load properly" goldenFiles = testJobA.getFiles() for testFile in testJobB.getFiles(): assert testFile in goldenFiles, "ERROR: Job loaded an unknown file" goldenFiles.remove(testFile) assert len(goldenFiles) == 0, "ERROR: Job didn't load all files" goldenFiles = testJobA.getFiles() for testFile in testJobC.getFiles(): assert testFile in goldenFiles, "ERROR: Job loaded an unknown file" goldenFiles.remove(testFile) assert len(goldenFiles) == 0, "ERROR: Job didn't load all files" return
def createDummyJobs(self, nJobs, location=None): """ _createDummyJobs_ Create some dummy jobs """ if not location: location = self.sites[0] nameStr = makeUUID() testWorkflow = Workflow( spec=nameStr, owner="tapas", name=nameStr, task="basicWorkload/Production", owner_vogroup="phgroup", owner_vorole="cmsrole", ) testWorkflow.create() testFileset = Fileset(name=nameStr) testFileset.create() testSubscription = Subscription( fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased" ) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() jobList = [] for i in range(nJobs): testJob = Job(name="%s-%i" % (nameStr, i)) testJob["location"] = location testJob["custom"]["location"] = location testJob["userdn"] = "tapas" testJob["owner"] = "tapas" testJob["userrole"] = "cmsrole" testJob["usergroup"] = "phgroup" testJob.create(testJobGroup) jobList.append(testJob) return jobList
def findAllJobs(startDir=None): """ Find all jobs with cache on disk """ currentTime = time.time() if not os.path.isdir(startDir): msg = "Cannot locate jobCacheDir! Cannot reconstruct jobs!" logging.error(msg) raise Exception(msg) jobList = [] #Assume that we're totally hosed, but that all Created and later jobs are on disk #Hunt through disk for jobs for workflowDir in os.listdir(startDir): for taskDir in os.listdir('%s/%s' % (startDir, workflowDir)): #Now we are in the individual tasks, which should be filled with jobCollections tDir = '%s/%s/%s' % (startDir, workflowDir, taskDir) for jobCollection in os.listdir(tDir): for jobDir in os.listdir('%s/%s' % (tDir, jobCollection)): tmpJob = Job(id=int(jobDir.split('_')[1])) tmpJob['name'] = 'RecoveryJobAttempt_%i_%f' % ( tmpJob['id'], currentTime) tmpJob['cache_dir'] = '%s/%s/%s' % (tDir, jobCollection, jobDir) jobList.append(tmpJob) return jobList
def createTestJob(self, testSubscription, jobName, *testFiles): """ _createTestJob_ Create a test job with two files as input. This will also create the appropriate workflow, jobgroup and subscription. """ testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFiles = list(testFiles) testJob = Job(name=jobName, files=testFiles) testJob["couch_record"] = "somecouchrecord" testJob["location"] = "test.site.ch" testJob.create(group=testJobGroup)
def testC_CheckWMBSBuildRoleAndGroup(self): """ _CheckWMBSBuild_ Trivial test that checks whether we can build runJobs from WMBS jobs """ jobGroup = [] # Create jobs for id in range(10): testJob = Job(name='Job_%i' % (id)) testJob['owner'] = "mnorman" testJob['usergroup'] = "mygroup_%i" % id testJob['userrole'] = "myrole_%i" % id testJob['location'] = 'Xanadu' jobGroup.append(testJob) for job in jobGroup: rj = RunJob() rj.buildFromJob(job=job) self.assertEqual(job['usergroup'], rj['usergroup']) self.assertEqual(job['userrole'], rj['userrole']) job2 = rj.buildWMBSJob() self.assertEqual(job['usergroup'], job2['usergroup']) self.assertEqual(job['userrole'], job2['userrole']) return
def createDummyJobs(self, nJobs, location=None): """ _createDummyJobs_ Create some dummy jobs """ if not location: location = self.sites[0] nameStr = makeUUID() testWorkflow = Workflow(spec=nameStr, owner="tapas", name=nameStr, task="basicWorkload/Production", owner_vogroup='phgroup', owner_vorole='cmsrole') testWorkflow.create() testFileset = Fileset(name=nameStr) testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() jobList = [] for i in range(nJobs): testJob = Job(name='%s-%i' % (nameStr, i)) testJob['location'] = location testJob['custom']['location'] = location testJob['userdn'] = 'tapas' testJob['owner'] = 'tapas' testJob['userrole'] = 'cmsrole' testJob['usergroup'] = 'phgroup' testJob.create(testJobGroup) jobList.append(testJob) return jobList
def createTestJobGroup(self): """ Creates a group of several jobs """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() testFileB.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.addFile(testFileB) testWMBSFileset.commit() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() for i in range(0, self.nJobs): testJob = Job(name=makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJobGroup.add(testJob) testJobGroup.commit() return testJobGroup
def testDeleteTransaction(self): """ _testDeleteTransaction_ Create a new job and commit it to the database. Start a new transaction and delete the file from the database. Verify that the file has been deleted. After that, roll back the transaction and verify that the job is once again in the database. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileA.create() testFileB.create() testJob = Job(name="TestJob", files=[testFileA, testFileB]) assert testJob.exists() is False, \ "ERROR: Job exists before it was created" testJob.create(group=testJobGroup) assert testJob.exists() >= 0, \ "ERROR: Job does not exist after it was created" myThread = threading.currentThread() myThread.transaction.begin() testJob.delete() assert testJob.exists() is False, \ "ERROR: Job exists after it was delete" myThread.transaction.rollback() assert testJob.exists() >= 0, \ "ERROR: Job does not exist after transaction was rolled back." return
def notestTwoJobGroups(self): """ Test two job groups with a shared fileset. (Minimal part of testGetLocations which was failing) """ testWorkflow1 = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test1") testWorkflow1.create() testWMBSFileset1 = WMBSFileset(name="TestFileset1") testWMBSFileset1.create() testSubscription1 = Subscription(fileset=testWMBSFileset1, workflow=testWorkflow1) testSubscription1.create() testJobGroup1 = JobGroup(subscription=testSubscription1) testJobGroup1.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.create() testJobA = Job(name="TestJobA") testJobA.addFile(testFileA) testJobGroup1.add(testJobA) testJobGroup1.commit() testWorkflow2 = Workflow(spec="spec.xml", owner="Simon", name="wf002", task="Test2") testWorkflow2.create() testWMBSFileset2 = WMBSFileset(name="TestFileset1") testWMBSFileset2.create() testSubscription2 = Subscription(fileset=testWMBSFileset2, workflow=testWorkflow2) testSubscription2.create() testJobGroup2 = JobGroup(subscription=testSubscription2) testJobGroup2.create() testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=10) testFileC.addRun(Run(10, *[12312])) testFileC.create() testJobA1 = Job(name="TestJobA1") testJobA1.addFile(testFileC) testJobGroup2.add(testJobA1) testJobGroup2.commit()
def getMinimalJob(): job = Job() job["task"] = "/ACDCTest/reco" job["workflow"] = "ACDCTest" job["location"] = "cmssrm.fnal.gov" job["owner"] = "cmsdataops" job["group"] = "cmsdataops" return job
def testAddChecksumsByLFN(self): """ _testAddChecksumsByLFN_ Tests for adding checksums by DAO by LFN """ testWorkflow = Workflow(spec="hello", owner="mnorman", name="wf001", task="basicWorkload/Production") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription( fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased" ) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(1, *[45])) testFileA.create() testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(1, *[45])) testFileB.create() testJobA = Job() testJobA.create(group=testJobGroup) testJobA.associateFiles() parentAction = self.daofactory(classname="Files.AddChecksumByLFN") binds = [ {"lfn": testFileA["lfn"], "cktype": "cksum", "cksum": 101}, {"lfn": testFileA["lfn"], "cktype": "adler32", "cksum": 201}, {"lfn": testFileB["lfn"], "cktype": "cksum", "cksum": 101}, ] parentAction.execute(bulkList=binds) testFileC = File(id=testFileA["id"]) testFileC.loadData() testFileD = File(id=testFileB["id"]) testFileD.loadData() self.assertEqual(testFileC["checksums"], {"adler32": "201", "cksum": "101"}) self.assertEqual(testFileD["checksums"], {"cksum": "101"}) return
def createTestJobGroup(self): """ Creates a group of several jobs """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() testFileB.create() testWMBSFileset.addFile(testFileA) testWMBSFileset.addFile(testFileB) testWMBSFileset.commit() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() for _ in range(0, self.nJobs): testJob = Job(name=makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJobGroup.add(testJob) testJobGroup.commit() return testJobGroup
def createTestJobGroup(self, nJobs, subType="Processing", retryOnce=False): """ _createTestJobGroup_ Creates a group of several jobs """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name=makeUUID(), task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type=subType) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() testFileB.create() for _ in range(0, nJobs): testJob = Job(name=makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['cache_dir'] = os.path.join(self.testDir, testJob['name']) os.mkdir(testJob['cache_dir']) testJobGroup.add(testJob) testJobGroup.commit() if retryOnce: self.increaseRetry.execute(testJobGroup.jobs) return testJobGroup
def createLargerTestJobGroup(self, commitFlag=True): """ _createTestJobGroup_ """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = WMBSFileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=10) testFileC.addRun(Run(10, *[12312])) testFileC.setLocation("T2_CH_CERN") testFileC.setLocation("malpaquet") testFileD = File(lfn="/this/is/a/lfnD", size=1024, events=10) testFileD.addRun(Run(10, *[12312])) testFileD.setLocation("T2_CH_CERN") testFileD.setLocation("malpaquet") testFileC.create() testFileD.create() testJobA = Job(name="TestJobA1") testJobA.addFile(testFileC) testJobB = Job(name="TestJobB1") testJobB.addFile(testFileD) testJobGroup.add(testJobA) testJobGroup.add(testJobB) for i in range(0, 100): testJob = Job(name="TestJob%i" % (i)) testJob.addFile(testFileC) testJobGroup.add(testJob) if commitFlag: testJobGroup.commit() return testJobGroup
def testMask(self): """ _testMask_ Test the new mask setup """ testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job() testJob['mask'].addRunAndLumis(run=100, lumis=[101, 102]) testJob['mask'].addRunAndLumis(run=200, lumis=[201, 202]) testJob.create(group=testJobGroup) loadJob = Job(id=testJob.exists()) loadJob.loadData() runs = loadJob['mask'].getRunAndLumis() self.assertEqual(len(runs), 2) self.assertEqual(runs[100], [[101, 102]]) self.assertEqual(runs[200], [[201, 202]]) bigRun = Run(100, *[101, 102, 103, 104]) badRun = Run(300, *[1001, 1002]) result = loadJob['mask'].filterRunLumisByMask([bigRun, badRun]) self.assertEqual(len(result), 1) alteredRun = result.pop() self.assertEqual(alteredRun.run, 100) self.assertEqual(alteredRun.lumis, [101, 102]) run0 = Run(300, *[1001, 1002]) run1 = Run(300, *[1001, 1002]) loadJob['mask'].filterRunLumisByMask([run0, run1]) return
def verifyJobSuccess(self, jobID): """ _verifyJobSuccess_ Verify that the metadata for a successful job is correct. This will check the outcome, retry count and state. """ testJob = Job(id = jobID) testJob.load() assert testJob["state"] == "success", \ "Error: test job in wrong state: %s" % testJob["state"] assert testJob["retry_count"] == 0, \ "Error: test job has wrong retry count: %s" % testJob["retry_count"] assert testJob["outcome"] == "success", \ "Error: test job has wrong outcome: %s" % testJob["outcome"] return
def testCreateDeleteExists(self): """ _testCreateDeleteExists_ Create and then delete a job. Use the job class's exists() method to determine if the job has been written to the database before it is created, after it has been created and after it has been deleted. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileA.create() testFileB.create() testJob = Job(name="TestJob", files=[testFileA, testFileB]) # testWU = WorkUnit(taskID=testWorkflow.id, fileid=testFileA['id'], runLumi=Run(1, *[44])) self.assertFalse(testJob.exists(), "Job exists before it was created") # self.assertFalse(testWU.exists(), "WorkUnit exists before it was created") testJob.create(group=testJobGroup) self.assertTrue(testJob.exists(), "Job does not exist after it was created") # self.assertFalse(testWU.exists(), "WorkUnit exists when there is no work") # Test the getWorkflow method workflow = testJob.getWorkflow() self.assertEqual(workflow['task'], 'Test') self.assertEqual(workflow['name'], 'wf001') testJob.delete() self.assertFalse(testJob.exists(), "Job exists after it was deleted") # self.assertFalse(testWU.exists(), "WorkUnit exists after job is deleted") return
def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site): """ _makeNJobs_ Make and return a WMBS Job and File This handles all those damn add-ons """ # Set the CacheDir cacheDir = os.path.join(self.testDir, 'CacheDir') for n in range(nJobs): # First make a file # site = self.sites[0] testFile = File(lfn="/singleLfn/%s/%s" % (name, n), size=1024, events=10) fileset.addFile(testFile) fileset.commit() location = None if isinstance(site, list): if len(site) > 0: location = site[0] else: location = site index = 0 for f in fileset.files: index += 1 testJob = Job(name='%s-%i' % (name, index)) testJob.addFile(f) testJob["location"] = location testJob["possiblePSN"] = set(site) if isinstance( site, list) else set([site]) testJob['task'] = task.getPathName() testJob['sandbox'] = task.data.input.sandbox testJob['spec'] = os.path.join(self.testDir, 'basicWorkload.pcl') testJob['mask']['FirstEvent'] = 101 testJob['priority'] = 101 testJob['numberOfCores'] = 1 jobCache = os.path.join(cacheDir, 'Sub_%i' % (sub), 'Job_%i' % (index)) os.makedirs(jobCache) testJob.create(jobGroup) testJob['cache_dir'] = jobCache testJob.save() jobGroup.add(testJob) output = open(os.path.join(jobCache, 'job.pkl'), 'w') pickle.dump(testJob, output) output.close() return testJob, testFile
def createDummyJobs(self, nJobs, location = None): """ _createDummyJobs_ Create some dummy jobs """ if not location: location = self.sites[0] nameStr = makeUUID() testWorkflow = Workflow(spec = nameStr, owner = "mnorman", name = nameStr, task="basicWorkload/Production", owner_vogroup = 'phgroup', owner_vorole = 'cmsrole') testWorkflow.create() testFileset = Fileset(name = nameStr) testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() jobList = [] for i in range(nJobs): testJob = Job(name = '%s-%i' % (nameStr, i)) testJob['location'] = location testJob['custom']['location'] = location testJob['userdn'] = 'mnorman' testJob['owner'] = 'mnorman' testJob['userrole'] = 'cmsrole' testJob['usergroup'] = 'phgroup' testJob.create(testJobGroup) jobList.append(testJob) return jobList
def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site, bl = [], wl = []): """ _makeNJobs_ Make and return a WMBS Job and File This handles all those damn add-ons """ # Set the CacheDir cacheDir = os.path.join(self.testDir, 'CacheDir') for n in range(nJobs): # First make a file #site = self.sites[0] testFile = File(lfn = "/singleLfn/%s/%s" % (name, n), size = 1024, events = 10) if type(site) == list: for singleSite in site: testFile.setLocation(singleSite) else: testFile.setLocation(site) testFile.create() fileset.addFile(testFile) fileset.commit() index = 0 for f in fileset.files: index += 1 testJob = Job(name = '%s-%i' % (name, index)) testJob.addFile(f) testJob["location"] = f.getLocations()[0] testJob['task'] = task.getPathName() testJob['sandbox'] = task.data.input.sandbox testJob['spec'] = os.path.join(self.testDir, 'basicWorkload.pcl') testJob['mask']['FirstEvent'] = 101 testJob["siteBlacklist"] = bl testJob["siteWhitelist"] = wl testJob['priority'] = 101 testJob['multicoreEnabled'] = False testJob['numberOfCores'] = 1 jobCache = os.path.join(cacheDir, 'Sub_%i' % (sub), 'Job_%i' % (index)) os.makedirs(jobCache) testJob.create(jobGroup) testJob['cache_dir'] = jobCache testJob.save() jobGroup.add(testJob) output = open(os.path.join(jobCache, 'job.pkl'), 'w') pickle.dump(testJob, output) output.close() return testJob, testFile
def findFinishedJobs(self): """ _findFinishedJobs_ Will actually, surprisingly, find finished jobs (i.e., jobs either exhausted or successful) """ jobList = [] jobListAction = self.daoFactory(classname="Jobs.GetAllJobs") jobList1 = jobListAction.execute(state="success", limitRows=self.numberOfJobsToArchive) jobList2 = jobListAction.execute(state="exhausted", limitRows=self.numberOfJobsToArchive) jobList3 = jobListAction.execute(state="killed", limitRows=self.numberOfJobsToArchive) jobList.extend(jobList1) jobList.extend(jobList2) jobList.extend(jobList3) if len(jobList) == 0: # Then nothing is ready return [] # Put together a list of job IDs binds = [] for jobID in jobList: binds.append({"jobid": jobID}) results = self.loadAction.execute(jobID=binds) if not isinstance(results, list): results = [results] doneList = [] for entry in results: # One job per entry tmpJob = Job(id=entry['id']) tmpJob.update(entry) doneList.append(tmpJob) return doneList
def createTestJobGroup(self, nJobs, subType = "Processing", retryOnce = False): """ _createTestJobGroup_ Creates a group of several jobs """ testWorkflow = Workflow(spec = "spec.xml", owner = "Simon", name = makeUUID(), task="Test") testWorkflow.create() testWMBSFileset = Fileset(name = "TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow, type = subType) testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10) testFileB.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() testFileB.create() for i in range(0, nJobs): testJob = Job(name = makeUUID()) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob['cache_dir'] = os.path.join(self.testDir, testJob['name']) os.mkdir(testJob['cache_dir']) testJobGroup.add(testJob) testJobGroup.commit() if retryOnce: self.increaseRetry.execute(testJobGroup.jobs) return testJobGroup
def testParentageByJob(self): """ _testParentageByJob_ Tests the DAO that assigns parentage by Job """ testWorkflow = Workflow(spec = 'hello', owner = "mnorman", name = "wf001", task="basicWorkload/Production") testWorkflow.create() testFileset = Fileset(name = "TestFileset") testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testFileParentA = File(lfn = "/this/is/a/parent/lfnA", size = 1024, events = 20, checksums = {'cksum': 1}) testFileParentA.addRun(Run( 1, *[45])) testFileParentB = File(lfn = "/this/is/a/parent/lfnB", size = 1024, events = 20, checksums = {'cksum': 1}) testFileParentB.addRun(Run( 1, *[45])) testFileParentA.create() testFileParentB.create() testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums = {'cksum':1}) testFileA.addRun(Run( 1, *[45])) testFileA.create() testJobA = Job() testJobA.create(group = testJobGroup) testJobA.addFile(testFileParentA) testJobA.addFile(testFileParentB) testJobA.associateFiles() parentAction = self.daofactory(classname = "Files.SetParentageByJob") parentAction.execute(binds = {'jobid': testJobA.exists(), 'child': testFileA['lfn']}) testFileB = File(id = testFileA["id"]) testFileB.loadData(parentage = 1) goldenFiles = [testFileParentA, testFileParentB] for parentFile in testFileB["parents"]: self.assertEqual(parentFile in goldenFiles, True, "ERROR: Unknown parent file") goldenFiles.remove(parentFile) self.assertEqual(len(goldenFiles), 0, "ERROR: Some parents are missing")
def findFinishedJobs(self): """ _findFinishedJobs_ Will actually, surprisingly, find finished jobs (i.e., jobs either exhausted or successful) """ jobList = [] jobListAction = self.daoFactory(classname = "Jobs.GetAllJobs") jobList1 = jobListAction.execute(state = "success") jobList2 = jobListAction.execute(state = "exhausted") jobList3 = jobListAction.execute(state = "killed") jobList.extend(jobList1) jobList.extend(jobList2) jobList.extend(jobList3) if len(jobList) == 0: # Then nothing is ready return [] # Put together a list of job IDs binds = [] for jobID in jobList: binds.append({"jobid": jobID}) results = self.loadAction.execute(jobID = binds) if not type(results) == list: results = [results] doneList = [] for entry in results: # One job per entry tmpJob = Job(id = entry['id']) tmpJob.update(entry) doneList.append(tmpJob) return doneList
def handleSuccessful(self, jobID, fwkJobReport, fwkJobReportPath=None): """ _handleSuccessful_ Handle a successful job, parsing the job report and updating the job in WMBS. """ wmbsJob = Job(id=jobID) wmbsJob.load() wmbsJob["outcome"] = "success" wmbsJob.getMask() outputID = wmbsJob.loadOutputID() wmbsJob["fwjr"] = fwkJobReport outputMap = self.getOutputMapAction.execute( jobID=jobID, conn=self.getDBConn(), transaction=self.existingTransaction()) jobType = self.getJobTypeAction.execute( jobID=jobID, conn=self.getDBConn(), transaction=self.existingTransaction()) fileList = fwkJobReport.getAllFiles() for fwjrFile in fileList: wmbsFile = self.addFileToWMBS(jobType, fwjrFile, wmbsJob["mask"], jobID=jobID, task=fwkJobReport.getTaskName()) merged = fwjrFile['merged'] moduleLabel = fwjrFile["module_label"] if merged: self.mergedOutputFiles.append(wmbsFile) self.filesetAssoc.append({ "lfn": wmbsFile["lfn"], "fileset": outputID }) outputFilesets = self.outputFilesetsForJob(outputMap, merged, moduleLabel) for outputFileset in outputFilesets: self.filesetAssoc.append({ "lfn": wmbsFile["lfn"], "fileset": outputFileset }) # Only save once job is done, and we're sure we made it through okay self._mapLocation(wmbsJob['fwjr']) self.listOfJobsToSave.append(wmbsJob) #wmbsJob.save() return
def testGetFiles(self): """ _testGetFiles_ Test the Job's getFiles() method. This should load the files from the database if they haven't been loaded already. """ testJobA = self.createTestJob() testJobB = Job(id=testJobA["id"]) testJobB.loadData() goldenFiles = testJobA.getFiles() for testFile in testJobB.getFiles(): assert testFile in goldenFiles, "ERROR: Job loaded an unknown file: %s" % testFile goldenFiles.remove(testFile) assert len(goldenFiles) == 0, "ERROR: Job didn't load all files" return
def testLoadOutputID(self): """ _testLoadOutputID_ Test whether we can load an output ID for a job """ testWorkflow = Workflow(spec = "spec.xml", owner = "Steve", name = "wf001", task="Test") testWorkflow.create() testFileset = Fileset(name = "TestFileset") testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow) testSubscription.create() testFileA = File(lfn = makeUUID(), locations = "test.site.ch") testFileB = File(lfn = makeUUID(), locations = "test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testJob = Job() testJob.create(group = testJobGroup) self.assertEqual(testJob.loadOutputID(), testJobGroup.output.id) return
def createTestJobs(self, nJobs, cacheDir): """ _createTestJobs_ Create several jobs """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() # Create a file testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileA.create() baseName = makeUUID() # Now create a job for i in range(nJobs): testJob = Job(name='%s-%i' % (baseName, i)) testJob.addFile(testFileA) testJob['location'] = 'malpaquet' testJob['retry_count'] = 1 testJob['retry_max'] = 10 testJob.create(testJobGroup) testJob.save() testJobGroup.add(testJob) testJobGroup.commit() # Set test job caches for job in testJobGroup.jobs: job.setCache(cacheDir) return testJobGroup
def makeNJobs(self, name, task, nJobs, jobGroup, fileset, sub, site = None, bl = [], wl = []): """ _makeNJobs_ Make and return a WMBS Job and File This handles all those damn add-ons """ # Set the CacheDir cacheDir = os.path.join(self.testDir, 'CacheDir') for n in range(nJobs): # First make a file #site = self.sites[0] testFile = File(lfn = "/singleLfn/%s/%s" %(name, n), size = 1024, events = 10) if site: testFile.setLocation(site) else: for tmpSite in self.sites: testFile.setLocation('se.%s' % (tmpSite)) testFile.create() fileset.addFile(testFile) fileset.commit() index = 0 for f in fileset.files: index += 1 testJob = Job(name = '%s-%i' %(name, index)) testJob.addFile(f) testJob["location"] = f.getLocations()[0] testJob['custom']['location'] = f.getLocations()[0] testJob['task'] = task.getPathName() testJob['sandbox'] = task.data.input.sandbox testJob['spec'] = os.path.join(self.testDir, 'basicWorkload.pcl') testJob['mask']['FirstEvent'] = 101 testJob['owner'] = 'tapas' testJob["siteBlacklist"] = bl testJob["siteWhitelist"] = wl testJob['ownerDN'] = 'tapas' testJob['ownerRole'] = 'cmsrole' testJob['ownerGroup'] = 'phgroup' jobCache = os.path.join(cacheDir, 'Sub_%i' % (sub), 'Job_%i' % (index)) os.makedirs(jobCache) testJob.create(jobGroup) testJob['cache_dir'] = jobCache testJob.save() jobGroup.add(testJob) output = open(os.path.join(jobCache, 'job.pkl'),'w') pickle.dump(testJob, output) output.close() return testJob, testFile
def testLoadOutputID(self): """ _testLoadOutputID_ Test whether we can load an output ID for a job """ testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.loadOutputID(), testJobGroup.output.id) return
def testGetFiles(self): """ _testGetFiles_ Test the Job's getFiles() method. This should load the files from the database if they haven't been loaded already. """ testJobA = self.createTestJob() testJobB = Job(id=testJobA["id"]) testJobB.loadData() goldenFiles = testJobA.getFiles() for testFile in testJobB.getFiles(): assert testFile in goldenFiles, \ "ERROR: Job loaded an unknown file: %s" % testFile goldenFiles.remove(testFile) assert not goldenFiles, "ERROR: Job didn't load all files" return
def testCreateDeleteExists(self): """ _testCreateDeleteExists_ Create and then delete a job. Use the job class's exists() method to determine if the job has been written to the database before it is created, after it has been created and after it has been deleted. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileA.create() testFileB.create() testJob = Job(name="TestJob", files=[testFileA, testFileB]) assert testJob.exists() == False, \ "ERROR: Job exists before it was created" testJob.create(group=testJobGroup) assert testJob.exists() >= 0, \ "ERROR: Job does not exist after it was created" testJob.delete() assert testJob.exists() == False, \ "ERROR: Job exists after it was delete" return
def getJob(workload): """ getJob Given a workload, get a job from it """ job = Job() job["task"] = workload.getTask("reco").getPathName() job["workflow"] = workload.name() job["location"] = "T1_US_FNAL" job["owner"] = "evansde77" job["group"] = "DMWM" return job
def createTestJobGroup(self, commitFlag=True): """ _createTestJobGroup_ """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = WMBSFileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation("goodse.cern.ch") testFileA.setLocation("malpaquet") testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10) testFileB.addRun(Run(10, *[12312])) testFileB.setLocation("goodse.cern.ch") testFileB.setLocation("malpaquet") testFileA.create() testFileB.create() testJobA = Job(name="TestJobA") testJobA.addFile(testFileA) testJobB = Job(name="TestJobB") testJobB.addFile(testFileB) testJobGroup.add(testJobA) testJobGroup.add(testJobB) if commitFlag: testJobGroup.commit() return testJobGroup