def testMask(self): """ _testMask_ Test the new mask setup """ testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job() testJob['mask'].addRunAndLumis(run=100, lumis=[101, 102]) testJob['mask'].addRunAndLumis(run=200, lumis=[201, 202]) testJob.create(group=testJobGroup) loadJob = Job(id=testJob.exists()) loadJob.loadData() runs = loadJob['mask'].getRunAndLumis() self.assertEqual(len(runs), 2) self.assertEqual(runs[100], [[101, 102]]) self.assertEqual(runs[200], [[201, 202]]) bigRun = Run(100, *[101, 102, 103, 104]) badRun = Run(300, *[1001, 1002]) result = loadJob['mask'].filterRunLumisByMask([bigRun, badRun]) self.assertEqual(len(result), 1) alteredRun = result.pop() self.assertEqual(alteredRun.run, 100) self.assertEqual(alteredRun.lumis, [101, 102]) run0 = Run(300, *[1001, 1002]) run1 = Run(300, *[1001, 1002]) loadJob['mask'].filterRunLumisByMask([run0, run1]) return
def test_AutoIncrementCheck(self): """ _AutoIncrementCheck_ Test and see whether we can find and set the auto_increment values """ myThread = threading.currentThread() if not myThread.dialect.lower() == "mysql": return testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() incrementDAO = self.daoFactory(classname="Jobs.AutoIncrementCheck") incrementDAO.execute() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 1) incrementDAO.execute() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 2) incrementDAO.execute(input=10) testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 11) incrementDAO.execute(input=5) testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 12) return
def createSubscriptionWithFileABC(self): """" _createSubscriptionWithFileABC_ Create a subscription where the input fileset has three files. Also create a second subscription that has acquired two of the files. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWorkflow2 = Workflow(spec="specBOGUS.xml", owner="Simon", name="wfBOGUS", task="Test") testWorkflow2.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=20, locations=set(["test.site.ch"])) testFileA.addRun(Run(1, *[45])) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=20, locations=set(["test.site.ch"])) testFileB.addRun(Run(1, *[46])) testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=20, locations=set(["test.site.ch"])) testFileC.addRun(Run(2, *[48])) testFileA.create() testFileB.create() testFileC.create() testFileset = Fileset(name="TestFileset") testFileset.create() testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.addFile(testFileC) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testSubscription2 = Subscription(fileset=testFileset, workflow=testWorkflow2) testSubscription2.create() testSubscription2.acquireFiles([testFileA, testFileB]) #return (testSubscription, testFileset, testWorkflow, testFileA, # testFileB, testFileC) return (testSubscription, testFileA, testFileB, testFileC)
def createSubscriptionWithFileABC(self): """" _createSubscriptionWithFileABC_ Create a subscription where the input fileset has three files. Also create a second subscription that has acquired two of the files. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWorkflow2 = Workflow(spec="specBOGUS.xml", owner="Simon", name="wfBOGUS", task="Test") testWorkflow2.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=20, locations=set(["test.site.ch"])) testFileA.addRun(Run(1, *[45])) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=20, locations=set(["test.site.ch"])) testFileB.addRun(Run(1, *[46])) testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=20, locations=set(["test.site.ch"])) testFileC.addRun(Run(2, *[48])) testFileA.create() testFileB.create() testFileC.create() testFileset = Fileset(name="TestFileset") testFileset.create() testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.addFile(testFileC) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testSubscription2 = Subscription(fileset=testFileset, workflow=testWorkflow2) testSubscription2.create() testSubscription2.acquireFiles([testFileA, testFileB]) # return (testSubscription, testFileset, testWorkflow, testFileA, # testFileB, testFileC) return (testSubscription, testFileA, testFileB, testFileC)
def testLoadOutputID(self): """ _testLoadOutputID_ Test whether we can load an output ID for a job """ testWorkflow = Workflow(spec = "spec.xml", owner = "Steve", name = "wf001", task="Test") testWorkflow.create() testFileset = Fileset(name = "TestFileset") testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow) testSubscription.create() testFileA = File(lfn = makeUUID(), locations = "test.site.ch") testFileB = File(lfn = makeUUID(), locations = "test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testJob = Job() testJob.create(group = testJobGroup) self.assertEqual(testJob.loadOutputID(), testJobGroup.output.id) return
def testLoadOutputID(self): """ _testLoadOutputID_ Test whether we can load an output ID for a job """ testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.loadOutputID(), testJobGroup.output.id) return
def createTestJobGroup(self, nJobs = 10, retry_count = 0, workloadPath = 'test'): """ Creates a group of several jobs """ myThread = threading.currentThread() myThread.transaction.begin() testWorkflow = Workflow(spec = workloadPath, owner = "Simon", name = "wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name = "TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testFile0 = File(lfn = "/this/is/a/parent", size = 1024, events = 10) testFile0.addRun(Run(10, *[12312])) testFile0.setLocation('malpaquet') testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10) testFileA.addRun(Run(10, *[12312])) testFileA.setLocation('malpaquet') testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10) testFileB.addRun(Run(10, *[12312])) testFileB.setLocation('malpaquet') testFile0.create() testFileA.create() testFileB.create() testFileA.addParent(lfn = "/this/is/a/parent") testFileB.addParent(lfn = "/this/is/a/parent") for i in range(0, nJobs): testJob = Job(name = makeUUID()) testJob['retry_count'] = retry_count testJob['retry_max'] = 10 testJob['group'] = 'BadGuys' testJob['user'] = '******' testJob['taskType'] = 'Merge' #testJob['fwjr'] = myReport testJobGroup.add(testJob) testJob.create(group = testJobGroup) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob.save() testJobGroup.commit() testSubscription.acquireFiles(files = [testFileA, testFileB]) testSubscription.save() myThread.transaction.commit() return testJobGroup
def testCompleteJobInput(self): """ _testCompleteJobInput_ Verify the correct output of the CompleteInput DAO. This should mark the input for a job as complete once all the jobs that run over a particular file have complete successfully. """ testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") bogusWorkflow = Workflow(spec="spec1.xml", owner="Steve", name="wf002", task="Test") testWorkflow.create() bogusWorkflow.create() testFileset = Fileset(name="TestFileset") bogusFileset = Fileset(name="BogusFileset") testFileset.create() bogusFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) bogusSubscription = Subscription(fileset=bogusFileset, workflow=bogusWorkflow) testSubscription.create() bogusSubscription.create() testFileA = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileB = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileC = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileA.create() testFileB.create() testFileC.create() testFileset.addFile([testFileA, testFileB, testFileC]) bogusFileset.addFile([testFileA, testFileB, testFileC]) testFileset.commit() bogusFileset.commit() testSubscription.acquireFiles([testFileA, testFileB, testFileC]) bogusSubscription.acquireFiles([testFileA, testFileB, testFileC]) testJobGroup = JobGroup(subscription=testSubscription) bogusJobGroup = JobGroup(subscription=bogusSubscription) testJobGroup.create() bogusJobGroup.create() testJobA = Job(name="TestJobA", files=[testFileA]) testJobB = Job(name="TestJobB", files=[testFileA, testFileB]) testJobC = Job(name="TestJobC", files=[testFileC]) bogusJob = Job(name="BogusJob", files=[testFileA, testFileB, testFileC]) testJobA.create(group=testJobGroup) testJobB.create(group=testJobGroup) testJobC.create(group=testJobGroup) bogusJob.create(group=bogusJobGroup) testJobA["outcome"] = "success" testJobB["outcome"] = "failure" testJobC["outcome"] = "success" testJobA.save() testJobB.save() testJobC.save() testJobA.completeInputFiles() compFiles = len(testSubscription.filesOfStatus("Completed")) assert compFiles == 0, \ "Error: test sub has wrong number of complete files: %s" % compFiles testJobB["outcome"] = "success" testJobB.save() testJobB.completeInputFiles(skipFiles=[testFileB["lfn"]]) availFiles = len(testSubscription.filesOfStatus("Available")) assert availFiles == 0, \ "Error: test sub has wrong number of available files: %s" % availFiles acqFiles = len(testSubscription.filesOfStatus("Acquired")) assert acqFiles == 1, \ "Error: test sub has wrong number of acquired files: %s" % acqFiles compFiles = len(testSubscription.filesOfStatus("Completed")) assert compFiles == 1, \ "Error: test sub has wrong number of complete files: %s" % compFiles failFiles = len(testSubscription.filesOfStatus("Failed")) assert failFiles == 1, \ "Error: test sub has wrong number of failed files: %s" % failFiles availFiles = len(bogusSubscription.filesOfStatus("Available")) assert availFiles == 0, \ "Error: test sub has wrong number of available files: %s" % availFiles acqFiles = len(bogusSubscription.filesOfStatus("Acquired")) assert acqFiles == 3, \ "Error: test sub has wrong number of acquired files: %s" % acqFiles compFiles = len(bogusSubscription.filesOfStatus("Completed")) assert compFiles == 0, \ "Error: test sub has wrong number of complete files: %s" % compFiles failFiles = len(bogusSubscription.filesOfStatus("Failed")) assert failFiles == 0, \ "Error: test sub has wrong number of failed files: %s" % failFiles return
def testGetOutputMapDAO(self): """ _testGetOutputMapDAO_ Verify the proper behavior of the GetOutputMapDAO for a variety of different processing chains. """ recoOutputFileset = Fileset(name="RECO") recoOutputFileset.create() mergedRecoOutputFileset = Fileset(name="MergedRECO") mergedRecoOutputFileset.create() alcaOutputFileset = Fileset(name="ALCA") alcaOutputFileset.create() mergedAlcaOutputFileset = Fileset(name="MergedALCA") mergedAlcaOutputFileset.create() dqmOutputFileset = Fileset(name="DQM") dqmOutputFileset.create() mergedDqmOutputFileset = Fileset(name="MergedDQM") mergedDqmOutputFileset.create() cleanupFileset = Fileset(name="Cleanup") cleanupFileset.create() testWorkflow = Workflow(spec="wf001.xml", owner="Steve", name="TestWF", task="None") testWorkflow.create() testWorkflow.addOutput("output", recoOutputFileset, mergedRecoOutputFileset) testWorkflow.addOutput("ALCARECOStreamCombined", alcaOutputFileset, mergedAlcaOutputFileset) testWorkflow.addOutput("DQM", dqmOutputFileset, mergedDqmOutputFileset) testWorkflow.addOutput("output", cleanupFileset) testWorkflow.addOutput("ALCARECOStreamCombined", cleanupFileset) testWorkflow.addOutput("DQM", cleanupFileset) testRecoMergeWorkflow = Workflow(spec="wf002.xml", owner="Steve", name="TestRecoMergeWF", task="None") testRecoMergeWorkflow.create() testRecoMergeWorkflow.addOutput("anything", mergedRecoOutputFileset, mergedRecoOutputFileset) testRecoProcWorkflow = Workflow(spec="wf004.xml", owner="Steve", name="TestRecoProcWF", task="None") testRecoProcWorkflow.create() testAlcaChildWorkflow = Workflow(spec="wf003.xml", owner="Steve", name="TestAlcaChildWF", task="None") testAlcaChildWorkflow.create() inputFile = File(lfn="/path/to/some/lfn", size=600000, events=60000, locations="cmssrm.fnal.gov") inputFile.create() testFileset = Fileset(name="TestFileset") testFileset.create() testFileset.addFile(inputFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="EventBased", type="Processing") testMergeRecoSubscription = Subscription( fileset=recoOutputFileset, workflow=testRecoMergeWorkflow, split_algo="WMBSMergeBySize", type="Merge") testProcRecoSubscription = Subscription(fileset=recoOutputFileset, workflow=testRecoProcWorkflow, split_algo="FileBased", type="Processing") testChildAlcaSubscription = Subscription( fileset=alcaOutputFileset, workflow=testAlcaChildWorkflow, split_algo="FileBased", type="Processing") testSubscription.create() testMergeRecoSubscription.create() testProcRecoSubscription.create() testChildAlcaSubscription.create() testSubscription.acquireFiles() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job(name="SplitJobA", files=[inputFile]) testJob.create(group=testJobGroup) testJob["state"] = "complete" testJob.save() outputMapAction = self.daoFactory(classname="Jobs.GetOutputMap") outputMap = outputMapAction.execute(jobID=testJob["id"]) assert len(outputMap.keys()) == 3, \ "Error: Wrong number of outputs for primary workflow." goldenMap = { "output": (recoOutputFileset.id, mergedRecoOutputFileset.id), "ALCARECOStreamCombined": (alcaOutputFileset.id, mergedAlcaOutputFileset.id), "DQM": (dqmOutputFileset.id, mergedDqmOutputFileset.id) } for outputID in outputMap.keys(): for outputFilesets in outputMap[outputID]: if outputFilesets["merged_output_fileset"] == None: self.assertEqual(outputFilesets["output_fileset"], cleanupFileset.id, "Error: Cleanup fileset is wrong.") continue self.assertTrue(outputID in goldenMap.keys(), "Error: Output identifier is missing.") self.assertEqual(outputFilesets["output_fileset"], goldenMap[outputID][0], "Error: Output fileset is wrong.") self.assertEqual(outputFilesets["merged_output_fileset"], goldenMap[outputID][1], "Error: Merged output fileset is wrong.") del goldenMap[outputID] self.assertEqual(len(goldenMap.keys()), 0, "Error: Missing output maps.") return
def test_AutoIncrementCheck(self): """ _AutoIncrementCheck_ Test and see whether we can find and set the auto_increment values """ myThread = threading.currentThread() if not myThread.dialect.lower() == 'mysql': return testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() incrementDAO = self.daoFactory(classname="Jobs.AutoIncrementCheck") incrementDAO.execute() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 1) incrementDAO.execute() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 2) incrementDAO.execute(input=10) testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 11) incrementDAO.execute(input=5) testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 12) return
def testFailJobInput(self): """ _testFailJobInput_ Test the Jobs.FailInput DAO and verify that it doesn't affect other jobs/subscriptions that run over the same files. """ testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") bogusWorkflow = Workflow(spec="spec1.xml", owner="Steve", name="wf002", task="Test") testWorkflow.create() bogusWorkflow.create() testFileset = Fileset(name="TestFileset") bogusFileset = Fileset(name="BogusFileset") testFileset.create() bogusFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) bogusSubscription = Subscription(fileset=bogusFileset, workflow=bogusWorkflow) testSubscription.create() bogusSubscription.create() testFileA = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileB = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileC = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileA.create() testFileB.create() testFileC.create() testFileset.addFile([testFileA, testFileB, testFileC]) bogusFileset.addFile([testFileA, testFileB, testFileC]) testFileset.commit() bogusFileset.commit() testSubscription.completeFiles([testFileA, testFileB, testFileC]) bogusSubscription.acquireFiles([testFileA, testFileB, testFileC]) testJobGroup = JobGroup(subscription=testSubscription) bogusJobGroup = JobGroup(subscription=bogusSubscription) testJobGroup.create() bogusJobGroup.create() testJobA = Job(name="TestJobA", files=[testFileA, testFileB, testFileC]) testJobB = Job(name="TestJobB", files=[testFileA, testFileB, testFileC]) bogusJob = Job(name="BogusJob", files=[testFileA, testFileB, testFileC]) testJobA.create(group=testJobGroup) testJobB.create(group=testJobGroup) bogusJob.create(group=bogusJobGroup) testJobA.failInputFiles() testJobB.failInputFiles() self.assertEqual(len(testSubscription.filesOfStatus("Available")), 0) self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 0) self.assertEqual(len(testSubscription.filesOfStatus("Failed")), 3) self.assertEqual(len(testSubscription.filesOfStatus("Completed")), 0) changeStateAction = self.daoFactory(classname="Jobs.ChangeState") testJobB["state"] = "cleanout" changeStateAction.execute([testJobB]) # Try again testJobA.failInputFiles() # Should now be failed self.assertEqual(len(testSubscription.filesOfStatus("Available")), 0) self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 0) self.assertEqual(len(testSubscription.filesOfStatus("Failed")), 3) self.assertEqual(len(testSubscription.filesOfStatus("Completed")), 0) # bogus should be unchanged self.assertEqual(len(bogusSubscription.filesOfStatus("Available")), 0) self.assertEqual(len(bogusSubscription.filesOfStatus("Acquired")), 3) self.assertEqual(len(bogusSubscription.filesOfStatus("Failed")), 0) self.assertEqual(len(bogusSubscription.filesOfStatus("Completed")), 0) return
class EndOfRunTest(unittest.TestCase): """ _EndOfRun_t_ End of run splitting test """ def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMCore.WMBS"], useDefault = False) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = daofactory(classname = "Locations.New") locationAction.execute(siteName = 's1', seName = "somese.cern.ch") locationAction.execute(siteName = 's2', seName = "otherse.cern.ch") self.multipleFileFileset = Fileset(name = "TestFileset1") self.multipleFileFileset.create() for i in range(10): newFile = File(makeUUID(), size = 1000, events = 100, locations = "somese.cern.ch") newFile.addRun(Run(i, *[45+i])) newFile.create() self.multipleFileFileset.addFile(newFile) self.multipleFileFileset.commit() self.singleFileFileset = Fileset(name = "TestFileset2") self.singleFileFileset.create() newFile = File("/some/file/name", size = 1000, events = 100, locations = "somese.cern.ch") newFile.addRun(Run(1, *[45])) newFile.create() self.singleFileFileset.addFile(newFile) self.singleFileFileset.commit() self.multipleFileLumiset = Fileset(name = "TestFileset3") self.multipleFileLumiset.create() for i in range(10): newFile = File(makeUUID(), size = 1000, events = 100, locations = "somese.cern.ch") newFile.addRun(Run(1, *[45+i/3])) newFile.create() self.multipleFileLumiset.addFile(newFile) self.multipleFileLumiset.commit() self.singleLumiFileset = Fileset(name = "TestFileset4") self.singleLumiFileset.create() for i in range(10): newFile = File(makeUUID(), size = 1000, events = 100, locations = "somese.cern.ch") newFile.addRun(Run(1, *[45])) newFile.create() self.singleLumiFileset.addFile(newFile) self.singleLumiFileset.commit() self.multipleSiteFileset = Fileset(name = "TestFileset5") self.multipleSiteFileset.create() for i in range(5): newFile = File(makeUUID(), size = 1000, events = 100, locations = "somese.cern.ch") newFile.addRun(Run(i, *[45+i])) newFile.create() self.multipleSiteFileset.addFile(newFile) for i in range(5): newFile = File(makeUUID(), size = 1000, events = 100, locations = "otherse.cern.ch") newFile.addRun(Run(i, *[45+i])) newFile.create() self.multipleSiteFileset.addFile(newFile) self.multipleSiteFileset.commit() testWorkflow = Workflow(spec = "spec.xml", owner = "mnorman", name = "wf001", task="Test") testWorkflow.create() self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset, workflow = testWorkflow, split_algo = "EndOfRun", type = "Processing") self.singleFileSubscription = Subscription(fileset = self.singleFileFileset, workflow = testWorkflow, split_algo = "EndOfRun", type = "Processing") self.multipleLumiSubscription = Subscription(fileset = self.multipleFileLumiset, workflow = testWorkflow, split_algo = "EndOfRun", type = "Processing") self.singleLumiSubscription = Subscription(fileset = self.singleLumiFileset, workflow = testWorkflow, split_algo = "EndOfRun", type = "Processing") self.multipleSiteSubscription = Subscription(fileset = self.multipleSiteFileset, workflow = testWorkflow, split_algo = "EndOfRun", type = "Processing") self.multipleFileSubscription.create() self.singleFileSubscription.create() self.multipleLumiSubscription.create() self.singleLumiSubscription.create() self.multipleSiteSubscription.create() return def tearDown(self): """ _tearDown_ Nothing to do... """ myThread = threading.currentThread() if myThread.transaction == None: myThread.transaction = Transaction(self.dbi) myThread.transaction.begin() factory = WMFactory("WMBS", "WMCore.WMBS") destroy = factory.loadObject(myThread.dialect + ".Destroy") destroyworked = destroy.execute(conn = myThread.transaction.conn) if not destroyworked: raise Exception("Could not complete WMBS tear down.") myThread.transaction.commit() return def testNone(self): """ _testNone_ since the subscriptions are open, we shouldn't get any jobs back """ splitter = SplitterFactory() jobFactory = splitter(package = "WMCore.WMBS", subscription = self.singleFileSubscription) jobGroups = jobFactory() self.assertEquals(jobGroups, [], "Should have returned a null set") jobFactory = splitter(self.multipleFileSubscription) jobGroups = jobFactory() self.assertEquals(jobGroups, [], "Should have returned a null set") jobFactory = splitter(self.multipleLumiSubscription) jobGroups = jobFactory() self.assertEquals(jobGroups, [], "Should have returned a null set") jobFactory = splitter(self.singleLumiSubscription) jobGroups = jobFactory() self.assertEquals(jobGroups, [], "Should have returned a null set") return def testClosed(self): """ _testClosed_ since the subscriptions are closed and none of the files have been acquired, all of the files should show up """ splitter = SplitterFactory() self.singleFileSubscription.getFileset().markOpen(False) jobFactory = splitter(package = "WMCore.WMBS", subscription = self.singleFileSubscription) jobGroups = jobFactory() assert len(jobGroups) == 1, \ "ERROR: JobFactory didn't return one JobGroup." assert len(jobGroups[0].jobs) == 1, \ "ERROR: JobFactory didn't create a single job." job = jobGroups[0].jobs.pop() assert job.getFiles(type = "lfn") == ["/some/file/name"], \ "ERROR: Job contains unknown files." self.multipleFileSubscription.getFileset().markOpen(False) jobFactory = splitter(self.multipleFileSubscription) jobGroups = jobFactory() self.assertEquals(len(jobGroups), 1) self.assertEquals(len(jobGroups[0].jobs),1) myfiles = jobGroups[0].jobs[0].getFiles() self.assertEquals(len(myfiles), 10) self.multipleLumiSubscription.getFileset().markOpen(False) jobFactory = splitter(self.multipleLumiSubscription) jobGroups = jobFactory() self.assertEquals(len(jobGroups), 1) self.assertEquals(len(jobGroups[0].jobs),1) myfiles = jobGroups[0].jobs[0].getFiles() self.assertEquals(len(myfiles), 10) #self.assertEquals(jobGroups, [], "Should have returned a null set") self.singleLumiSubscription.getFileset().markOpen(False) jobFactory = splitter(self.singleLumiSubscription) jobGroups = jobFactory() assert len(jobGroups) == 1, \ "ERROR: JobFactory didn't return one JobGroup." assert len(jobGroups[0].jobs) == 1, \ "ERROR: JobFactory didn't create a single job." myfiles = jobGroups[0].jobs[0].getFiles() self.assertEquals(len(myfiles), 10) def testAllAcquired(self): """ _testAllAcquired_ should all return no job groups """ splitter = SplitterFactory() self.singleFileSubscription.acquireFiles( self.singleFileSubscription.availableFiles()) jobFactory = splitter(package = "WMCore.WMBS", subscription = self.singleFileSubscription) jobGroups = jobFactory() self.assertEquals(jobGroups, [], "Should have returned a null set") self.multipleFileSubscription.acquireFiles( self.multipleFileSubscription.availableFiles()) jobFactory = splitter(self.multipleFileSubscription) jobGroups = jobFactory() self.assertEquals(jobGroups, [], "Should have returned a null set") self.multipleLumiSubscription.acquireFiles( self.multipleLumiSubscription.availableFiles()) jobFactory = splitter(self.multipleLumiSubscription) jobGroups = jobFactory() self.assertEquals(jobGroups, [], "Should have returned a null set") self.singleLumiSubscription.acquireFiles( self.singleLumiSubscription.availableFiles()) jobFactory = splitter(self.singleLumiSubscription) jobGroups = jobFactory() self.assertEquals(jobGroups, [], "Should have returned a null set") def testClosedSomeAcquired(self): """ _testClosedSomeAcquired_ since the subscriptions are closed and none of the files ahve been acquired, all of the files should show up """ splitter = SplitterFactory() self.multipleFileSubscription.getFileset().markOpen(False) self.singleFileSubscription.acquireFiles( [self.singleFileSubscription.availableFiles().pop()]) jobFactory = splitter(package = "WMCore.WMBS", subscription = self.singleFileSubscription) jobGroups = jobFactory() self.assertEquals(jobGroups, [], "Should have returned a null set") self.multipleFileSubscription.getFileset().markOpen(False) self.multipleFileSubscription.acquireFiles( [self.multipleFileSubscription.availableFiles().pop()]) jobFactory = splitter(package = "WMCore.WMBS", subscription =self.multipleFileSubscription) jobGroups = jobFactory() self.assertEquals(len(jobGroups),1, "Should have gotten one jobGroup") self.assertEquals(len(jobGroups[0].jobs), 1, \ "JobFactory should have made one job") myfiles = jobGroups[0].jobs[0].getFiles() self.assertEquals(len(myfiles), 9, \ "JobFactory should have provides us with 9 files") self.multipleLumiSubscription.getFileset().markOpen(False) self.multipleLumiSubscription.acquireFiles( [self.multipleLumiSubscription.availableFiles().pop()]) jobFactory = splitter(self.multipleLumiSubscription) jobGroups = jobFactory() self.assertEquals(len(jobGroups),1, "Should have gotten one jobGroup") self.assertEquals(len(jobGroups[0].jobs), 1, \ "JobFactory should have made one job") myfiles = jobGroups[0].jobs[0].getFiles() self.assertEquals(len(myfiles), 9, \ "JobFactory should have provides us with 9 files") self.singleLumiSubscription.getFileset().markOpen(False) self.singleLumiSubscription.acquireFiles( [self.singleLumiSubscription.availableFiles().pop()]) jobFactory = splitter(self.singleLumiSubscription) jobGroups = jobFactory() self.assertEquals(len(jobGroups),1, "Should have gotten one jobGroup") self.assertEquals(len(jobGroups[0].jobs), 1, \ "JobFactory should have made one job") myfiles = jobGroups[0].jobs[0].getFiles() self.assertEquals(len(myfiles), 9, \ "JobFactory should have provides us with 9 files") self.assertEquals(len(myfiles), 9) def testClosed_MultipleJobs(self): """ _testClosed_MultipleJobs_ Should break subscription into jobs based on number of files """ splitter = SplitterFactory() self.multipleFileSubscription.getFileset().markOpen(False) jobFactory = splitter(package = "WMCore.WMBS", subscription = self.multipleFileSubscription) jobGroups = jobFactory(files_per_job = 1) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 10) job = jobGroups[0].jobs[0] # We should have one file per job self.assertEqual(len(job['input_files']), 1) return def testClosed_MultipleSites(self): """ _testClosed_MultipleSites_ Test whether we can handle multiple sites """ splitter = SplitterFactory() self.multipleSiteSubscription.getFileset().markOpen(False) jobFactory = splitter(package = "WMCore.WMBS", subscription = self.multipleSiteSubscription) jobGroups = jobFactory(files_per_job = 10) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs), 2) for job in jobGroups[0].jobs: self.assertEqual(len(jobGroups[0].jobs[0]['input_files']), 5)
class FixedDelayTest(unittest.TestCase): def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) myThread = threading.currentThread() daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daofactory(classname="Locations.New") locationAction.execute(siteName="site1", pnn="T2_CH_CERN") self.multipleFileFileset = Fileset(name="TestFileset1") self.multipleFileFileset.create() for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations=set(["T2_CH_CERN"])) newFile.addRun(Run(i, *[45 + i])) newFile.create() self.multipleFileFileset.addFile(newFile) self.multipleFileFileset.commit() self.singleFileFileset = Fileset(name="TestFileset2") self.singleFileFileset.create() newFile = File("/some/file/name", size=1000, events=100, locations=set(["T2_CH_CERN"])) newFile.addRun(Run(1, *[45])) newFile.create() self.singleFileFileset.addFile(newFile) self.singleFileFileset.commit() self.multipleFileLumiset = Fileset(name="TestFileset3") self.multipleFileLumiset.create() for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations=set(["T2_CH_CERN"])) newFile.addRun(Run(1, *[45 + i / 3])) newFile.create() self.multipleFileLumiset.addFile(newFile) self.multipleFileLumiset.commit() self.singleLumiFileset = Fileset(name="TestFileset4") self.singleLumiFileset.create() for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations=set(["T2_CH_CERN"])) newFile.addRun(Run(1, *[45])) newFile.create() self.singleLumiFileset.addFile(newFile) self.singleLumiFileset.commit() testWorkflow = Workflow(spec="spec.xml", owner="mnorman", name="wf001", task="Test") testWorkflow.create() self.multipleFileSubscription = Subscription( fileset=self.multipleFileFileset, workflow=testWorkflow, split_algo="FixedDelay", type="Processing") self.singleFileSubscription = Subscription( fileset=self.singleFileFileset, workflow=testWorkflow, split_algo="FixedDelay", type="Processing") self.multipleLumiSubscription = Subscription( fileset=self.multipleFileLumiset, workflow=testWorkflow, split_algo="FixedDelay", type="Processing") self.singleLumiSubscription = Subscription( fileset=self.singleLumiFileset, workflow=testWorkflow, split_algo="FixedDelay", type="Processing") self.multipleFileSubscription.create() self.singleFileSubscription.create() self.multipleLumiSubscription.create() self.singleLumiSubscription.create() return def tearDown(self): """ _tearDown_ Nothing to do... """ self.testInit.clearDatabase() return def testNone(self): """ _testNone_ Since the subscriptions are open, we shouldn't get any jobs back """ splitter = SplitterFactory() jobFactory = splitter(self.singleFileSubscription) jobGroups = jobFactory(trigger_time=int(time.time()) * 2) self.assertEquals(jobGroups, [], "Should have returned a null set") jobFactory = splitter(self.multipleFileSubscription) jobGroups = jobFactory(trigger_time=int(time.time()) * 2) self.assertEquals(jobGroups, [], "Should have returned a null set") jobFactory = splitter(self.multipleLumiSubscription) jobGroups = jobFactory(trigger_time=int(time.time()) * 2) self.assertEquals(jobGroups, [], "Should have returned a null set") jobFactory = splitter(self.singleLumiSubscription) jobGroups = jobFactory(trigger_time=int(time.time()) * 2) self.assertEquals(jobGroups, [], "Should have returned a null set") return def testClosed(self): """ _testClosed_ Since the subscriptions are closed and none of the files have been acquired, all of the files should show up """ splitter = SplitterFactory() self.singleFileSubscription.getFileset().markOpen(False) jobFactory = splitter(self.singleFileSubscription) jobGroups = jobFactory(trigger_time=1) assert len(jobGroups) == 1, \ "ERROR: JobFactory didn't return one JobGroup." assert len(jobGroups[0].jobs) == 1, \ "ERROR: JobFactory didn't create a single job." job = jobGroups[0].jobs.pop() assert job.getFiles(type = "lfn") == ["/some/file/name"], \ "ERROR: Job contains unknown files." self.multipleFileSubscription.getFileset().markOpen(False) jobFactory = splitter(self.multipleFileSubscription) jobGroups = jobFactory(trigger_time=1) self.assertEquals(len(jobGroups), 1) self.assertEquals(len(jobGroups[0].jobs), 1) myfiles = jobGroups[0].jobs[0].getFiles() self.assertEquals(len(myfiles), 10) self.multipleLumiSubscription.getFileset().markOpen(False) jobFactory = splitter(self.multipleLumiSubscription) jobGroups = jobFactory(trigger_time=1) self.assertEquals(len(jobGroups), 1) self.assertEquals(len(jobGroups[0].jobs), 1) myfiles = jobGroups[0].jobs[0].getFiles() self.assertEquals(len(myfiles), 10) #self.assertEquals(jobGroups, [], "Should have returned a null set") self.singleLumiSubscription.getFileset().markOpen(False) jobFactory = splitter(self.singleLumiSubscription) jobGroups = jobFactory(trigger_time=1) assert len(jobGroups) == 1, \ "ERROR: JobFactory didn't return one JobGroup." assert len(jobGroups[0].jobs) == 1, \ "ERROR: JobFactory didn't create a single job." myfiles = jobGroups[0].jobs[0].getFiles() self.assertEquals(len(myfiles), 10) def testAllAcquired(self): """ _testAllAcquired_ should all return no job groups """ splitter = SplitterFactory() self.singleFileSubscription.acquireFiles( self.singleFileSubscription.availableFiles()) jobFactory = splitter(self.singleFileSubscription) jobGroups = jobFactory(trigger_time=1) self.assertEquals(jobGroups, [], "Should have returned a null set") self.multipleFileSubscription.acquireFiles( self.multipleFileSubscription.availableFiles()) jobFactory = splitter(self.multipleFileSubscription) jobGroups = jobFactory(trigger_time=1) self.assertEquals(jobGroups, [], "Should have returned a null set") self.multipleLumiSubscription.acquireFiles( self.multipleLumiSubscription.availableFiles()) jobFactory = splitter(self.multipleLumiSubscription) jobGroups = jobFactory(trigger_time=1) self.assertEquals(jobGroups, [], "Should have returned a null set") self.singleLumiSubscription.acquireFiles( self.singleLumiSubscription.availableFiles()) jobFactory = splitter(self.singleLumiSubscription) jobGroups = jobFactory(trigger_time=1) self.assertEquals(jobGroups, [], "Should have returned a null set") def testClosedSomeAcquired(self): """ _testClosedSomeAcquired_ since the subscriptions are closed and none of the files ahve been acquired, all of the files should show up """ splitter = SplitterFactory() self.multipleFileSubscription.getFileset().markOpen(False) self.singleFileSubscription.acquireFiles( [self.singleFileSubscription.availableFiles().pop()]) jobFactory = splitter(self.singleFileSubscription) jobGroups = jobFactory(trigger_time=1) self.assertEquals(jobGroups, [], "Should have returned a null set") self.multipleFileSubscription.getFileset().markOpen(False) self.multipleFileSubscription.acquireFiles( [self.multipleFileSubscription.availableFiles().pop()]) jobFactory = splitter(package="WMCore.WMBS", subscription=self.multipleFileSubscription) jobGroups = jobFactory(trigger_time=1) self.assertEquals(len(jobGroups), 1, "Should have gotten one jobGroup") self.assertEquals(len(jobGroups[0].jobs), 1, \ "JobFactory should have made one job") myfiles = jobGroups[0].jobs[0].getFiles() self.assertEquals(len(myfiles), 9, \ "JobFactory should have provides us with 9 files") self.multipleLumiSubscription.getFileset().markOpen(False) self.multipleLumiSubscription.acquireFiles( [self.multipleLumiSubscription.availableFiles().pop()]) jobFactory = splitter(self.multipleLumiSubscription) jobGroups = jobFactory(trigger_time=1) self.assertEquals(len(jobGroups), 1, "Should have gotten one jobGroup") self.assertEquals(len(jobGroups[0].jobs), 1, \ "JobFactory should have made one job") myfiles = jobGroups[0].jobs[0].getFiles() self.assertEquals(len(myfiles), 9, \ "JobFactory should have provides us with 9 files") self.singleLumiSubscription.getFileset().markOpen(False) self.singleLumiSubscription.acquireFiles( [self.singleLumiSubscription.availableFiles().pop()]) jobFactory = splitter(self.singleLumiSubscription) jobGroups = jobFactory(trigger_time=1) self.assertEquals(len(jobGroups), 1, "Should have gotten one jobGroup") self.assertEquals(len(jobGroups[0].jobs), 1, \ "JobFactory should have made one job") myfiles = jobGroups[0].jobs[0].getFiles() self.assertEquals(len(myfiles), 9, \ "JobFactory should have provides us with 9 files") self.assertEquals(len(myfiles), 9)
def createTestJobGroup(self, nJobs=10, retry_count=1, workloadPath='test', fwjrPath=None, workloadName=makeUUID(), fileModifier=''): """ Creates a group of several jobs """ myThread = threading.currentThread() myThread.transaction.begin() testWorkflow = Workflow(spec=workloadPath, owner="cmsdataops", group="cmsdataops", name=workloadName, task="/TestWorkload/ReReco") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFile0 = File(lfn="/this/is/a/parent%s" % fileModifier, size=1024, events=10) testFile0.addRun(Run(10, *[12312])) testFile0.setLocation('T2_CH_CERN') testFileA = File(lfn="/this/is/a/lfnA%s" % fileModifier, size=1024, events=10, first_event=88, merged=False) testFileA.addRun(Run(10, *[12312, 12313])) testFileA.setLocation('T2_CH_CERN') testFileB = File(lfn="/this/is/a/lfnB%s" % fileModifier, size=1024, events=10, first_event=88, merged=False) testFileB.addRun(Run(10, *[12314, 12315, 12316])) testFileB.setLocation('T2_CH_CERN') testFile0.create() testFileA.create() testFileB.create() testFileA.addParent(lfn="/this/is/a/parent%s" % fileModifier) testFileB.addParent(lfn="/this/is/a/parent%s" % fileModifier) for i in range(0, nJobs): testJob = Job(name=makeUUID()) testJob['retry_count'] = retry_count testJob['retry_max'] = 10 testJob['mask'].addRunAndLumis(run=10, lumis=[12312]) testJob['mask'].addRunAndLumis(run=10, lumis=[12314, 12316]) testJob['cache_dir'] = os.path.join(self.testDir, testJob['name']) testJob['fwjr_path'] = fwjrPath os.mkdir(testJob['cache_dir']) testJobGroup.add(testJob) testJob.create(group=testJobGroup) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob.save() testJobGroup.commit() testSubscription.acquireFiles(files=[testFileA, testFileB]) testSubscription.save() myThread.transaction.commit() return testJobGroup
class FixedDelayTest(unittest.TestCase): def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMCore.WMBS"], useDefault = False) myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = daofactory(classname = "Locations.New") locationAction.execute(siteName = "site1", pnn = "T2_CH_CERN") self.multipleFileFileset = Fileset(name = "TestFileset1") self.multipleFileFileset.create() for i in range(10): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["T2_CH_CERN"])) newFile.addRun(Run(i, *[45+i])) newFile.create() self.multipleFileFileset.addFile(newFile) self.multipleFileFileset.commit() self.singleFileFileset = Fileset(name = "TestFileset2") self.singleFileFileset.create() newFile = File("/some/file/name", size = 1000, events = 100, locations = set(["T2_CH_CERN"])) newFile.addRun(Run(1, *[45])) newFile.create() self.singleFileFileset.addFile(newFile) self.singleFileFileset.commit() self.multipleFileLumiset = Fileset(name = "TestFileset3") self.multipleFileLumiset.create() for i in range(10): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["T2_CH_CERN"])) newFile.addRun(Run(1, *[45+i/3])) newFile.create() self.multipleFileLumiset.addFile(newFile) self.multipleFileLumiset.commit() self.singleLumiFileset = Fileset(name = "TestFileset4") self.singleLumiFileset.create() for i in range(10): newFile = File(makeUUID(), size = 1000, events = 100, locations = set(["T2_CH_CERN"])) newFile.addRun(Run(1, *[45])) newFile.create() self.singleLumiFileset.addFile(newFile) self.singleLumiFileset.commit() testWorkflow = Workflow(spec = "spec.xml", owner = "mnorman", name = "wf001", task="Test") testWorkflow.create() self.multipleFileSubscription = Subscription(fileset = self.multipleFileFileset, workflow = testWorkflow, split_algo = "FixedDelay", type = "Processing") self.singleFileSubscription = Subscription(fileset = self.singleFileFileset, workflow = testWorkflow, split_algo = "FixedDelay", type = "Processing") self.multipleLumiSubscription = Subscription(fileset = self.multipleFileLumiset, workflow = testWorkflow, split_algo = "FixedDelay", type = "Processing") self.singleLumiSubscription = Subscription(fileset = self.singleLumiFileset, workflow = testWorkflow, split_algo = "FixedDelay", type = "Processing") self.multipleFileSubscription.create() self.singleFileSubscription.create() self.multipleLumiSubscription.create() self.singleLumiSubscription.create() return def tearDown(self): """ _tearDown_ Nothing to do... """ self.testInit.clearDatabase() return def testNone(self): """ _testNone_ Since the subscriptions are open, we shouldn't get any jobs back """ splitter = SplitterFactory() jobFactory = splitter(self.singleFileSubscription) jobGroups = jobFactory(trigger_time = int(time.time())*2) self.assertEqual(jobGroups, [], "Should have returned a null set") jobFactory = splitter(self.multipleFileSubscription) jobGroups = jobFactory(trigger_time = int(time.time())*2) self.assertEqual(jobGroups, [], "Should have returned a null set") jobFactory = splitter(self.multipleLumiSubscription) jobGroups = jobFactory(trigger_time = int(time.time())*2) self.assertEqual(jobGroups, [], "Should have returned a null set") jobFactory = splitter(self.singleLumiSubscription) jobGroups = jobFactory(trigger_time = int(time.time())*2) self.assertEqual(jobGroups, [], "Should have returned a null set") return def testClosed(self): """ _testClosed_ Since the subscriptions are closed and none of the files have been acquired, all of the files should show up """ splitter = SplitterFactory() self.singleFileSubscription.getFileset().markOpen(False) jobFactory = splitter(self.singleFileSubscription) jobGroups = jobFactory(trigger_time = 1) assert len(jobGroups) == 1, \ "ERROR: JobFactory didn't return one JobGroup." assert len(jobGroups[0].jobs) == 1, \ "ERROR: JobFactory didn't create a single job." job = jobGroups[0].jobs.pop() assert job.getFiles(type = "lfn") == ["/some/file/name"], \ "ERROR: Job contains unknown files." self.multipleFileSubscription.getFileset().markOpen(False) jobFactory = splitter(self.multipleFileSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs),1) myfiles = jobGroups[0].jobs[0].getFiles() self.assertEqual(len(myfiles), 10) self.multipleLumiSubscription.getFileset().markOpen(False) jobFactory = splitter(self.multipleLumiSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(len(jobGroups), 1) self.assertEqual(len(jobGroups[0].jobs),1) myfiles = jobGroups[0].jobs[0].getFiles() self.assertEqual(len(myfiles), 10) #self.assertEqual(jobGroups, [], "Should have returned a null set") self.singleLumiSubscription.getFileset().markOpen(False) jobFactory = splitter(self.singleLumiSubscription) jobGroups = jobFactory(trigger_time = 1) assert len(jobGroups) == 1, \ "ERROR: JobFactory didn't return one JobGroup." assert len(jobGroups[0].jobs) == 1, \ "ERROR: JobFactory didn't create a single job." myfiles = jobGroups[0].jobs[0].getFiles() self.assertEqual(len(myfiles), 10) def testAllAcquired(self): """ _testAllAcquired_ should all return no job groups """ splitter = SplitterFactory() self.singleFileSubscription.acquireFiles( self.singleFileSubscription.availableFiles()) jobFactory = splitter(self.singleFileSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(jobGroups, [], "Should have returned a null set") self.multipleFileSubscription.acquireFiles( self.multipleFileSubscription.availableFiles()) jobFactory = splitter(self.multipleFileSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(jobGroups, [], "Should have returned a null set") self.multipleLumiSubscription.acquireFiles( self.multipleLumiSubscription.availableFiles()) jobFactory = splitter(self.multipleLumiSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(jobGroups, [], "Should have returned a null set") self.singleLumiSubscription.acquireFiles( self.singleLumiSubscription.availableFiles()) jobFactory = splitter(self.singleLumiSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(jobGroups, [], "Should have returned a null set") def testClosedSomeAcquired(self): """ _testClosedSomeAcquired_ since the subscriptions are closed and none of the files ahve been acquired, all of the files should show up """ splitter = SplitterFactory() self.multipleFileSubscription.getFileset().markOpen(False) self.singleFileSubscription.acquireFiles( [self.singleFileSubscription.availableFiles().pop()]) jobFactory = splitter(self.singleFileSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(jobGroups, [], "Should have returned a null set") self.multipleFileSubscription.getFileset().markOpen(False) self.multipleFileSubscription.acquireFiles( [self.multipleFileSubscription.availableFiles().pop()]) jobFactory = splitter(package = "WMCore.WMBS", subscription =self.multipleFileSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(len(jobGroups),1, "Should have gotten one jobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, \ "JobFactory should have made one job") myfiles = jobGroups[0].jobs[0].getFiles() self.assertEqual(len(myfiles), 9, \ "JobFactory should have provides us with 9 files") self.multipleLumiSubscription.getFileset().markOpen(False) self.multipleLumiSubscription.acquireFiles( [self.multipleLumiSubscription.availableFiles().pop()]) jobFactory = splitter(self.multipleLumiSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(len(jobGroups),1, "Should have gotten one jobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, \ "JobFactory should have made one job") myfiles = jobGroups[0].jobs[0].getFiles() self.assertEqual(len(myfiles), 9, \ "JobFactory should have provides us with 9 files") self.singleLumiSubscription.getFileset().markOpen(False) self.singleLumiSubscription.acquireFiles( [self.singleLumiSubscription.availableFiles().pop()]) jobFactory = splitter(self.singleLumiSubscription) jobGroups = jobFactory(trigger_time = 1) self.assertEqual(len(jobGroups),1, "Should have gotten one jobGroup") self.assertEqual(len(jobGroups[0].jobs), 1, \ "JobFactory should have made one job") myfiles = jobGroups[0].jobs[0].getFiles() self.assertEqual(len(myfiles), 9, \ "JobFactory should have provides us with 9 files") self.assertEqual(len(myfiles), 9)
def createTestJobGroup(self, nJobs = 10, retry_count = 1, workloadPath = 'test', fwjrPath = None, workloadName = makeUUID()): """ Creates a group of several jobs """ myThread = threading.currentThread() myThread.transaction.begin() testWorkflow = Workflow(spec = workloadPath, owner = "cmsdataops", group = "cmsdataops", name = workloadName, task="/TestWorkload/ReReco") testWorkflow.create() testWMBSFileset = Fileset(name = "TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testFile0 = File(lfn = "/this/is/a/parent", size = 1024, events = 10) testFile0.addRun(Run(10, *[12312])) testFile0.setLocation('malpaquet') testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10, first_event = 88, last_event = 99) testFileA.addRun(Run(10, *[12312, 12313])) testFileA.setLocation('malpaquet') testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10, first_event = 88, last_event = 99) testFileB.addRun(Run(10, *[12314, 12315, 12316])) testFileB.setLocation('malpaquet') testFile0.create() testFileA.create() testFileB.create() testFileA.addParent(lfn = "/this/is/a/parent") testFileB.addParent(lfn = "/this/is/a/parent") for i in range(0, nJobs): testJob = Job(name = makeUUID()) testJob['retry_count'] = retry_count testJob['retry_max'] = 10 testJob['mask'].addRunAndLumis(run = 10, lumis = [12312]) testJob['mask'].addRunAndLumis(run = 10, lumis = [12314, 12316]) testJob['mask']['FirstEvent'] = 100 testJob['cache_dir'] = os.path.join(self.testDir, testJob['name']) testJob['fwjr_path'] = fwjrPath os.mkdir(testJob['cache_dir']) testJobGroup.add(testJob) testJob.create(group = testJobGroup) testJob.addFile(testFileA) testJob.addFile(testFileB) testJob.save() testJobGroup.commit() testSubscription.acquireFiles(files = [testFileA, testFileB]) testSubscription.save() myThread.transaction.commit() return testJobGroup
class ReportIntegrationTest(unittest.TestCase): """ _ReportIntegrationTest_ """ def setUp(self): """ _setUp_ Setup the database and WMBS for the test. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMComponent.DBS3Buffer", "WMCore.WMBS"], useDefault = False) myThread = threading.currentThread() self.daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) self.dbsfactory = DAOFactory(package = "WMComponent.DBS3Buffer", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = self.daofactory(classname = "Locations.New") locationAction.execute(siteName = "site1", seName = "cmssrm.fnal.gov") inputFile = File(lfn = "/path/to/some/lfn", size = 10, events = 10, locations = "cmssrm.fnal.gov") inputFile.create() inputFileset = Fileset(name = "InputFileset") inputFileset.create() inputFileset.addFile(inputFile) inputFileset.commit() unmergedFileset = Fileset(name = "UnmergedFileset") unmergedFileset.create() mergedFileset = Fileset(name = "MergedFileset") mergedFileset.create() procWorkflow = Workflow(spec = "wf001.xml", owner = "Steve", name = "TestWF", task = "/TestWF/None") procWorkflow.create() procWorkflow.addOutput("outputRECORECO", unmergedFileset) mergeWorkflow = Workflow(spec = "wf002.xml", owner = "Steve", name = "MergeWF", task = "/MergeWF/None") mergeWorkflow.create() mergeWorkflow.addOutput("Merged", mergedFileset) insertWorkflow = self.dbsfactory(classname = "InsertWorkflow") insertWorkflow.execute("TestWF", "/TestWF/None", 0, 0, 0, 0) insertWorkflow.execute("MergeWF", "/MergeWF/None", 0, 0, 0, 0) self.procSubscription = Subscription(fileset = inputFileset, workflow = procWorkflow, split_algo = "FileBased", type = "Processing") self.procSubscription.create() self.procSubscription.acquireFiles() self.mergeSubscription = Subscription(fileset = unmergedFileset, workflow = mergeWorkflow, split_algo = "WMBSMergeBySize", type = "Merge") self.mergeSubscription.create() self.procJobGroup = JobGroup(subscription = self.procSubscription) self.procJobGroup.create() self.mergeJobGroup = JobGroup(subscription = self.mergeSubscription) self.mergeJobGroup.create() self.testJob = Job(name = "testJob", files = [inputFile]) self.testJob.create(group = self.procJobGroup) self.testJob["state"] = "complete" myThread = threading.currentThread() self.daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) self.stateChangeAction = self.daofactory(classname = "Jobs.ChangeState") self.setFWJRAction = self.daofactory(classname = "Jobs.SetFWJRPath") self.getJobTypeAction = self.daofactory(classname = "Jobs.GetType") locationAction = self.daofactory(classname = "Locations.New") locationAction.execute(siteName = "cmssrm.fnal.gov") self.stateChangeAction.execute(jobs = [self.testJob]) self.tempDir = tempfile.mkdtemp() return def tearDown(self): """ _tearDown_ Clear out the database and the pickled report file. """ self.testInit.clearDatabase() try: os.remove(os.path.join(self.tempDir, "ProcReport.pkl")) os.remove(os.path.join(self.tempDir, "MergeReport.pkl")) except Exception as ex: pass try: os.rmdir(self.tempDir) except Exception as ex: pass return def createConfig(self, workerThreads): """ _createConfig_ Create a config for the JobAccountant with the given number of worker threads. This config needs to include information for connecting to the database as the component will create it's own database connections. These parameters are still pulled from the environment. """ config = self.testInit.getConfiguration() self.testInit.generateWorkDir(config) config.section_("JobStateMachine") config.JobStateMachine.couchurl = os.getenv("COUCHURL") config.JobStateMachine.couchDBName = "report_integration_t" config.JobStateMachine.jobSummaryDBName = "report_integration_wmagent_summary_t" config.component_("JobAccountant") config.JobAccountant.pollInterval = 60 config.JobAccountant.workerThreads = workerThreads config.JobAccountant.componentDir = os.getcwd() config.JobAccountant.logLevel = 'SQLDEBUG' config.component_("TaskArchiver") config.TaskArchiver.localWMStatsURL = "%s/%s" % (config.JobStateMachine.couchurl, config.JobStateMachine.jobSummaryDBName) return config def verifyJobSuccess(self, jobID): """ _verifyJobSuccess_ Verify that the metadata for a successful job is correct. This will check the outcome, retry count and state. """ testJob = Job(id = jobID) testJob.load() assert testJob["state"] == "success", \ "Error: test job in wrong state: %s" % testJob["state"] assert testJob["retry_count"] == 0, \ "Error: test job has wrong retry count: %s" % testJob["retry_count"] assert testJob["outcome"] == "success", \ "Error: test job has wrong outcome: %s" % testJob["outcome"] return def verifyFileMetaData(self, jobID, fwkJobReportFiles): """ _verifyFileMetaData_ Verify that all the files that were output by a job made it into WMBS correctly. Compare the contents of WMBS to the files in the frameworks job report. Note that fwkJobReportFiles is a list of DataStructs File objects. """ testJob = Job(id = jobID) testJob.loadData() inputLFNs = [] for inputFile in testJob["input_files"]: inputLFNs.append(inputFile["lfn"]) for fwkJobReportFile in fwkJobReportFiles: outputFile = File(lfn = fwkJobReportFile["lfn"]) outputFile.loadData(parentage = 1) assert outputFile["events"] == int(fwkJobReportFile["events"]), \ "Error: Output file has wrong events: %s, %s" % \ (outputFile["events"], fwkJobReportFile["events"]) assert outputFile["size"] == int(fwkJobReportFile["size"]), \ "Error: Output file has wrong size: %s, %s" % \ (outputFile["size"], fwkJobReportFile["size"]) for ckType in fwkJobReportFile["checksums"].keys(): assert ckType in outputFile["checksums"].keys(), \ "Error: Output file is missing checksums: %s" % ckType assert outputFile["checksums"][ckType] == fwkJobReportFile["checksums"][ckType], \ "Error: Checksums don't match." assert len(fwkJobReportFile["checksums"].keys()) == \ len(outputFile["checksums"].keys()), \ "Error: Wrong number of checksums." jobType = self.getJobTypeAction.execute(jobID = jobID) if jobType == "Merge": assert str(outputFile["merged"]) == "True", \ "Error: Merge jobs should output merged files." else: assert outputFile["merged"] == fwkJobReportFile["merged"], \ "Error: Output file merged output is wrong: %s, %s" % \ (outputFile["merged"], fwkJobReportFile["merged"]) assert len(outputFile["locations"]) == 1, \ "Error: outputfile should have one location: %s" % outputFile["locations"] assert list(outputFile["locations"])[0] == list(fwkJobReportFile["locations"])[0], \ "Error: wrong location for file." assert len(outputFile["parents"]) == len(inputLFNs), \ "Error: Output file has wrong number of parents." for outputParent in outputFile["parents"]: assert outputParent["lfn"] in inputLFNs, \ "Error: Unknown parent file: %s" % outputParent["lfn"] fwjrRuns = {} for run in fwkJobReportFile["runs"]: fwjrRuns[run.run] = run.lumis for run in outputFile["runs"]: assert run.run in fwjrRuns, \ "Error: Extra run in output: %s" % run.run for lumi in run: assert lumi in fwjrRuns[run.run], \ "Error: Extra lumi: %s" % lumi fwjrRuns[run.run].remove(lumi) if len(fwjrRuns[run.run]) == 0: del fwjrRuns[run.run] assert len(fwjrRuns.keys()) == 0, \ "Error: Missing runs, lumis: %s" % fwjrRuns testJobGroup = JobGroup(id = testJob["jobgroup"]) testJobGroup.loadData() jobGroupFileset = testJobGroup.output jobGroupFileset.loadData() assert outputFile["id"] in jobGroupFileset.getFiles(type = "id"), \ "Error: output file not in jobgroup fileset." if testJob["mask"]["FirstEvent"] == None: assert outputFile["first_event"] == 0, \ "Error: first event not set correctly: 0, %s" % \ outputFile["first_event"] else: assert testJob["mask"]["FirstEvent"] == outputFile["first_event"], \ "Error: last event not set correctly: %s, %s" % \ (testJob["mask"]["FirstEvent"], outputFile["first_event"]) return def testReportHandling(self): """ _testReportHandling_ Verify that we're able to parse a CMSSW report, convert it to a Report() style report, pickle it and then have the accountant process it. """ self.procPath = os.path.join(WMCore.WMBase.getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWProcessingReport.xml") myReport = Report("cmsRun1") myReport.parse(self.procPath) # Fake some metadata that should be added by the stageout scripts. for fileRef in myReport.getAllFileRefsFromStep("cmsRun1"): fileRef.size = 1024 fileRef.location = "cmssrm.fnal.gov" fwjrPath = os.path.join(self.tempDir, "ProcReport.pkl") cmsRunStep = myReport.retrieveStep("cmsRun1") cmsRunStep.status = 0 myReport.setTaskName('/TestWF/None') myReport.persist(fwjrPath) self.setFWJRAction.execute(jobID = self.testJob["id"], fwjrPath = fwjrPath) pFile = DBSBufferFile(lfn = "/path/to/some/lfn", size = 600000, events = 60000) pFile.setAlgorithm(appName = "cmsRun", appVer = "UNKNOWN", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") pFile.setDatasetPath("/bogus/dataset/path") #pFile.addRun(Run(1, *[45])) pFile.create() config = self.createConfig(workerThreads = 1) accountant = JobAccountantPoller(config) accountant.setup() accountant.algorithm() self.verifyJobSuccess(self.testJob["id"]) self.verifyFileMetaData(self.testJob["id"], myReport.getAllFilesFromStep("cmsRun1")) inputFile = File(lfn = "/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR09_R_34X_V5_All_v1/0000/outputRECORECO.root") inputFile.load() self.testMergeJob = Job(name = "testMergeJob", files = [inputFile]) self.testMergeJob.create(group = self.mergeJobGroup) self.testMergeJob["state"] = "complete" self.stateChangeAction.execute(jobs = [self.testMergeJob]) self.mergePath = os.path.join(WMCore.WMBase.getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWMergeReport.xml") myReport = Report("mergeReco") myReport.parse(self.mergePath) # Fake some metadata that should be added by the stageout scripts. for fileRef in myReport.getAllFileRefsFromStep("mergeReco"): fileRef.size = 1024 fileRef.location = "cmssrm.fnal.gov" fileRef.dataset = {"applicationName": "cmsRun", "applicationVersion": "CMSSW_3_4_2_patch1", "primaryDataset": "MinimumBias", "processedDataset": "Rereco-v1", "dataTier": "RECO"} fwjrPath = os.path.join(self.tempDir, "MergeReport.pkl") myReport.setTaskName('/MergeWF/None') cmsRunStep = myReport.retrieveStep("mergeReco") cmsRunStep.status = 0 myReport.persist(fwjrPath) self.setFWJRAction.execute(jobID = self.testMergeJob["id"], fwjrPath = fwjrPath) accountant.algorithm() self.verifyJobSuccess(self.testMergeJob["id"]) self.verifyFileMetaData(self.testMergeJob["id"], myReport.getAllFilesFromStep("mergeReco")) return
def testParentageByMergeJob(self): """ _testParentageByJob_ Tests the DAO that assigns parentage by Job """ testWorkflow = Workflow(spec = 'hello', owner = "mnorman", name = "wf001", task="basicWorkload/Production") testWorkflow.create() testFileset = Fileset(name = "TestFileset") testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, type = "Processing", split_algo = "FileBased") testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testFileParentA = File(lfn = "/this/is/a/parent/lfnA", size = 1024, events = 20, checksums = {'cksum': 1}, locations = set(['T2_CH_CERN', 'T1_US_FNAL_Disk'])) testFileParentA.addRun(Run( 1, *[45])) testFileParentB = File(lfn = "/this/is/a/parent/lfnB", size = 1024, events = 20, checksums = {'cksum': 1}, locations = set(['T2_CH_CERN', 'T1_US_FNAL_Disk'])) testFileParentB.addRun(Run( 1, *[45])) testFileParentA.create() testFileParentB.create() testFileset.addFile(testFileParentA) testFileset.addFile(testFileParentB) testFileset.commit() testFileA = File(lfn = "/this/is/a/lfn", size = 1024, events = 10, checksums = {'cksum':1}) testFileA.addRun(Run( 1, *[45])) testFileA.create() testJobA = Job() testJobA["outcome"] = 'success' testJobA.create(group = testJobGroup) testJobA.addFile(testFileParentA) testJobA.addFile(testFileParentB) testJobA.associateFiles() testSubscription.acquireFiles() testJobA.completeInputFiles() parentAction = self.daofactory(classname = "Files.SetParentageByMergeJob") parentAction.execute(binds = {'jobid': testJobA.exists(), 'child': testFileA['lfn']}) testFileB = File(id = testFileA["id"]) testFileB.loadData(parentage = 1) goldenFiles = [testFileParentA, testFileParentB] for parentFile in testFileB["parents"]: self.assertEqual(parentFile in goldenFiles, True, "ERROR: Unknown parent file") goldenFiles.remove(parentFile) self.assertEqual(len(goldenFiles), 0, "ERROR: Some parents are missing") testFileC = File(lfn = "/this/is/c/lfn", size = 1024, events = 10, checksums = {'cksum':1}) testFileC.addRun(Run( 1, *[46])) testFileC.create() testJobC = Job() testJobC["outcome"] = 'failure' testJobC.create(group = testJobGroup) testJobC.addFile(testFileParentA) testJobC.associateFiles() testSubscription.acquireFiles() testJobC.failInputFiles() parentAction.execute(binds = {'jobid': testJobC.exists(), 'child': testFileC['lfn']}) testFileB = File(id = testFileA["id"]) testFileB.loadData(parentage = 1) goldenFiles = [testFileParentA] for parentFile in testFileB["parents"]: self.assertEqual(parentFile in goldenFiles, False, "ERROR: Unknown parent file") testFileC_1 = File(id = testFileC["id"]) testFileC_1.loadData(parentage = 1) goldenFiles = [testFileParentA] for parentFile in testFileC_1["parents"]: self.assertEqual(parentFile in goldenFiles, False, "ERROR: Unknown parent file")
class WMBSHelperTest(unittest.TestCase): def setUp(self): """ _setUp_ """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("wmbshelper_t/jobs", "JobDump") self.testInit.setupCouch("wmbshelper_t/fwjrs", "FWJRDump") os.environ["COUCHDB"] = "wmbshelper_t" self.testInit.setSchema(customModules = ["WMCore.WMBS", "WMComponent.DBSBuffer.Database", "WMCore.BossAir", "WMCore.ResourceControl"], useDefault = False) self.workDir = self.testInit.generateWorkDir() self.wmspec = self.createWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = MockDBSReader(self.inputDataset.dbsurl) self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = threading.currentThread().logger, dbinterface = threading.currentThread().dbi) return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.clearDatabase() self.testInit.tearDownCouch() self.testInit.delWorkDir() return def setupForKillTest(self, baAPI = None): """ _setupForKillTest_ Inject a workflow into WMBS that has a processing task, a merge task and a cleanup task. Inject files into the various tasks at various processing states (acquired, complete, available...). Also create jobs for each subscription in various states. """ myThread = threading.currentThread() daoFactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = daoFactory(classname = "Locations.New") changeStateAction = daoFactory(classname = "Jobs.ChangeState") resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', seName = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertThreshold(siteName = 'site1', taskType = 'Processing', \ maxSlots = 10000) inputFileset = Fileset("input") inputFileset.create() inputFileA = File("lfnA", locations = "goodse.cern.ch") inputFileB = File("lfnB", locations = "goodse.cern.ch") inputFileC = File("lfnC", locations = "goodse.cern.ch") inputFileA.create() inputFileB.create() inputFileC.create() inputFileset.addFile(inputFileA) inputFileset.addFile(inputFileB) inputFileset.addFile(inputFileC) inputFileset.commit() unmergedOutputFileset = Fileset("unmerged") unmergedOutputFileset.create() unmergedFileA = File("ulfnA", locations = "goodse.cern.ch") unmergedFileB = File("ulfnB", locations = "goodse.cern.ch") unmergedFileC = File("ulfnC", locations = "goodse.cern.ch") unmergedFileA.create() unmergedFileB.create() unmergedFileC.create() unmergedOutputFileset.addFile(unmergedFileA) unmergedOutputFileset.addFile(unmergedFileB) unmergedOutputFileset.addFile(unmergedFileC) unmergedOutputFileset.commit() mainProcWorkflow = Workflow(spec = "spec1", owner = "Steve", name = "Main", task = "Proc") mainProcWorkflow.create() mainProcMergeWorkflow = Workflow(spec = "spec1", owner = "Steve", name = "Main", task = "ProcMerge") mainProcMergeWorkflow.create() mainCleanupWorkflow = Workflow(spec = "spec1", owner = "Steve", name = "Main", task = "Cleanup") mainCleanupWorkflow.create() self.mainProcSub = Subscription(fileset = inputFileset, workflow = mainProcWorkflow, type = "Processing") self.mainProcSub.create() self.mainProcSub.acquireFiles(inputFileA) self.mainProcSub.completeFiles(inputFileB) procJobGroup = JobGroup(subscription = self.mainProcSub) procJobGroup.create() self.procJobA = Job(name = "ProcJobA") self.procJobA["state"] = "new" self.procJobA["location"] = "site1" self.procJobB = Job(name = "ProcJobB") self.procJobB["state"] = "executing" self.procJobB["location"] = "site1" self.procJobC = Job(name = "ProcJobC") self.procJobC["state"] = "complete" self.procJobC["location"] = "site1" self.procJobA.create(procJobGroup) self.procJobB.create(procJobGroup) self.procJobC.create(procJobGroup) self.mainMergeSub = Subscription(fileset = unmergedOutputFileset, workflow = mainProcMergeWorkflow, type = "Merge") self.mainMergeSub.create() self.mainMergeSub.acquireFiles(unmergedFileA) self.mainMergeSub.failFiles(unmergedFileB) mergeJobGroup = JobGroup(subscription = self.mainMergeSub) mergeJobGroup.create() self.mergeJobA = Job(name = "MergeJobA") self.mergeJobA["state"] = "exhausted" self.mergeJobA["location"] = "site1" self.mergeJobB = Job(name = "MergeJobB") self.mergeJobB["state"] = "cleanout" self.mergeJobB["location"] = "site1" self.mergeJobC = Job(name = "MergeJobC") self.mergeJobC["state"] = "new" self.mergeJobC["location"] = "site1" self.mergeJobA.create(mergeJobGroup) self.mergeJobB.create(mergeJobGroup) self.mergeJobC.create(mergeJobGroup) self.mainCleanupSub = Subscription(fileset = unmergedOutputFileset, workflow = mainCleanupWorkflow, type = "Cleanup") self.mainCleanupSub.create() self.mainCleanupSub.acquireFiles(unmergedFileA) self.mainCleanupSub.completeFiles(unmergedFileB) cleanupJobGroup = JobGroup(subscription = self.mainCleanupSub) cleanupJobGroup.create() self.cleanupJobA = Job(name = "CleanupJobA") self.cleanupJobA["state"] = "new" self.cleanupJobA["location"] = "site1" self.cleanupJobB = Job(name = "CleanupJobB") self.cleanupJobB["state"] = "executing" self.cleanupJobB["location"] = "site1" self.cleanupJobC = Job(name = "CleanupJobC") self.cleanupJobC["state"] = "complete" self.cleanupJobC["location"] = "site1" self.cleanupJobA.create(cleanupJobGroup) self.cleanupJobB.create(cleanupJobGroup) self.cleanupJobC.create(cleanupJobGroup) jobList = [self.procJobA, self.procJobB, self.procJobC, self.mergeJobA, self.mergeJobB, self.mergeJobC, self.cleanupJobA, self.cleanupJobB, self.cleanupJobC] changeStateAction.execute(jobList) if baAPI: for job in jobList: job['plugin'] = 'TestPlugin' job['userdn'] = 'Steve' job['custom']['location'] = 'site1' baAPI.createNewJobs(wmbsJobs = jobList) # We'll create an unrelated workflow to verify that it isn't affected # by the killing code. bogusFileset = Fileset("dontkillme") bogusFileset.create() bogusFileA = File("bogus/lfnA", locations = "goodse.cern.ch") bogusFileA.create() bogusFileset.addFile(bogusFileA) bogusFileset.commit() bogusWorkflow = Workflow(spec = "spec2", owner = "Steve", name = "Bogus", task = "Proc") bogusWorkflow.create() self.bogusSub = Subscription(fileset = bogusFileset, workflow = bogusWorkflow, type = "Processing") self.bogusSub.create() self.bogusSub.acquireFiles(bogusFileA) return def verifyFileKillStatus(self): """ _verifyFileKillStatus_ Verify that all files were killed correctly. The status of files in Cleanup and LogCollect subscriptions isn't modified. Status of already completed and failed files is not modified. Also verify that the bogus subscription is untouched. """ failedFiles = self.mainProcSub.filesOfStatus("Failed") acquiredFiles = self.mainProcSub.filesOfStatus("Acquired") completedFiles = self.mainProcSub.filesOfStatus("Completed") availableFiles = self.mainProcSub.filesOfStatus("Available") bogusAcquiredFiles = self.bogusSub.filesOfStatus("Acquired") self.assertEqual(len(availableFiles), 0, \ "Error: There should be no available files.") self.assertEqual(len(acquiredFiles), 0, \ "Error: There should be no acquired files.") self.assertEqual(len(bogusAcquiredFiles), 1, \ "Error: There should be one acquired file.") self.assertEqual(len(completedFiles), 3, \ "Error: There should be only one completed file.") goldenLFNs = ["lfnA", "lfnB", "lfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra completed file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(failedFiles), 0, \ "Error: There should be no failed files.") self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainMergeSub.filesOfStatus("Failed") acquiredFiles = self.mainMergeSub.filesOfStatus("Acquired") completedFiles = self.mainMergeSub.filesOfStatus("Completed") availableFiles = self.mainMergeSub.filesOfStatus("Available") self.assertEqual(len(acquiredFiles), 0, \ "Error: Merge subscription should have 0 acq files.") self.assertEqual(len(availableFiles), 0, \ "Error: Merge subscription should have 0 avail files.") self.assertEqual(len(failedFiles), 1, \ "Error: Merge subscription should have 1 failed files.") self.assertEqual(list(failedFiles)[0]["lfn"], "ulfnB", "Error: Wrong failed file.") self.assertEqual(len(completedFiles), 2, \ "Error: Merge subscription should have 2 compl files.") goldenLFNs = ["ulfnA", "ulfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra complete file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainCleanupSub.filesOfStatus("Failed") acquiredFiles = self.mainCleanupSub.filesOfStatus("Acquired") completedFiles = self.mainCleanupSub.filesOfStatus("Completed") availableFiles = self.mainCleanupSub.filesOfStatus("Available") self.assertEqual(len(failedFiles), 0, \ "Error: Cleanup subscription should have 0 fai files.") self.assertEqual(len(acquiredFiles), 1, \ "Error: There should be only one acquired file.") self.assertEqual(list(acquiredFiles)[0]["lfn"], "ulfnA", \ "Error: Wrong acquired LFN.") self.assertEqual(len(completedFiles), 1, \ "Error: There should be only one completed file.") self.assertEqual(list(completedFiles)[0]["lfn"], "ulfnB", \ "Error: Wrong completed LFN.") self.assertEqual(len(availableFiles), 1, \ "Error: There should be only one available file.") self.assertEqual(list(availableFiles)[0]["lfn"], "ulfnC", \ "Error: Wrong completed LFN.") return def verifyJobKillStatus(self): """ _verifyJobKillStatus_ Verify that jobs are killed correctly. Jobs belonging to Cleanup and LogCollect subscriptions are not killed. The status of jobs that have already finished running is not changed. """ self.procJobA.load() self.procJobB.load() self.procJobC.load() self.assertEqual(self.procJobA["state"], "killed", \ "Error: Proc job A should be killed.") self.assertEqual(self.procJobB["state"], "killed", \ "Error: Proc job B should be killed.") self.assertEqual(self.procJobC["state"], "complete", \ "Error: Proc job C should be complete.") self.mergeJobA.load() self.mergeJobB.load() self.mergeJobC.load() self.assertEqual(self.mergeJobA["state"], "exhausted", \ "Error: Merge job A should be exhausted.") self.assertEqual(self.mergeJobB["state"], "cleanout", \ "Error: Merge job B should be cleanout.") self.assertEqual(self.mergeJobC["state"], "killed", \ "Error: Merge job C should be killed.") self.cleanupJobA.load() self.cleanupJobB.load() self.cleanupJobC.load() self.assertEqual(self.cleanupJobA["state"], "new", \ "Error: Cleanup job A should be new.") self.assertEqual(self.cleanupJobB["state"], "executing", \ "Error: Cleanup job B should be executing.") self.assertEqual(self.cleanupJobC["state"], "complete", \ "Error: Cleanup job C should be complete.") return def testKillWorkflow(self): """ _testKillWorkflow_ Verify that workflow killing works correctly. """ configFile = EmulatorSetup.setupWMAgentConfig() config = loadConfigurationFile(configFile) baAPI = BossAirAPI(config = config) # Create nine jobs self.setupForKillTest(baAPI = baAPI) self.assertEqual(len(baAPI._listRunJobs()), 9) killWorkflow("Main", config, config) self.verifyFileKillStatus() self.verifyJobKillStatus() self.assertEqual(len(baAPI._listRunJobs()), 8) EmulatorSetup.deleteConfig(configFile) return def createTestWMSpec(self): """ _createTestWMSpec_ Create a WMSpec that has a processing, merge, cleanup and skims tasks that can be used by the subscription creation test. """ testWorkload = WMWorkloadHelper(WMWorkload("TestWorkload")) testWorkload.setDashboardActivity("TestReReco") testWorkload.setSpecUrl("/path/to/workload") testWorkload.setOwnerDetails("sfoulkes", "DMWM", {'dn': 'MyDN'}) procTask = testWorkload.newTask("ProcessingTask") procTask.setTaskType("Processing") procTask.setSplittingAlgorithm("FileBased", files_per_job = 1) procTaskCMSSW = procTask.makeStep("cmsRun1") procTaskCMSSW.setStepType("CMSSW") procTaskCMSSWHelper = procTaskCMSSW.getTypeHelper() procTask.setTaskType("Processing") procTask.setSiteWhitelist(["site1"]) procTask.setSiteBlacklist(["site2"]) procTask.applyTemplates() procTaskCMSSWHelper.addOutputModule("OutputA", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) mergeTask = procTask.addTask("MergeTask") mergeTask.setInputReference(procTaskCMSSW, outputModule = "OutputA") mergeTask.setTaskType("Merge") mergeTask.setSplittingAlgorithm("WMBSMergeBySize", min_merge_size = 1, max_merge_size = 2, max_merge_events = 3) mergeTaskCMSSW = mergeTask.makeStep("cmsRun1") mergeTaskCMSSW.setStepType("CMSSW") mergeTaskCMSSWHelper = mergeTaskCMSSW.getTypeHelper() mergeTask.setTaskType("Merge") mergeTask.applyTemplates() mergeTaskCMSSWHelper.addOutputModule("Merged", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) cleanupTask = procTask.addTask("CleanupTask") cleanupTask.setInputReference(procTaskCMSSW, outputModule = "OutputA") cleanupTask.setTaskType("Merge") cleanupTask.setSplittingAlgorithm("SiblingProcessingBase", files_per_job = 50) cleanupTaskCMSSW = cleanupTask.makeStep("cmsRun1") cleanupTaskCMSSW.setStepType("CMSSW") cleanupTaskCMSSWHelper = cleanupTaskCMSSW.getTypeHelper() cleanupTask.setTaskType("Cleanup") cleanupTask.applyTemplates() skimTask = mergeTask.addTask("SkimTask") skimTask.setTaskType("Skim") skimTask.setInputReference(mergeTaskCMSSW, outputModule = "Merged") skimTask.setSplittingAlgorithm("FileBased", files_per_job = 1, include_parents = True) skimTaskCMSSW = skimTask.makeStep("cmsRun1") skimTaskCMSSW.setStepType("CMSSW") skimTaskCMSSWHelper = skimTaskCMSSW.getTypeHelper() skimTask.setTaskType("Skim") skimTask.applyTemplates() skimTaskCMSSWHelper.addOutputModule("SkimOutputA", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) skimTaskCMSSWHelper.addOutputModule("SkimOutputB", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) return testWorkload def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', seName = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertSite(siteName = 'site2', seName = 'goodse2.cern.ch', ceName = 'site2', plugin = "TestPlugin") testWorkload = self.createTestWMSpec() testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock", cachepath = self.workDir) testWMBSHelper.createSubscription() procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual(procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputA"][0]["merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputA"][0]["output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual(mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual(mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual(unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual(mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset = unmergedProcOutput, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', seName = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertSite(siteName = 'site2', seName = 'goodse2.cern.ch', ceName = 'site2', plugin = "TestPlugin") testWorkload = self.createTestWMSpec() testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock", cachepath = self.workDir) testWMBSHelper.createSubscription() testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") testResubmitWMBSHelper = WMBSHelper(testWorkload, "SomeBlock2", cachepath = self.workDir) testResubmitWMBSHelper.createSubscription() mergeWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual(mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual(unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual(mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset = topLevelFileset, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def setupMCWMSpec(self): """Setup MC workflow""" self.wmspec = self.createMCWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = None self.siteDB = fakeSiteDB() def createWMSpec(self, name = 'ReRecoWorkload'): wmspec = rerecoWorkload(name, rerecoArgs) wmspec.setSpecUrl("/path/to/workload") return wmspec def createMCWMSpec(self, name = 'MonteCarloWorkload'): wmspec = monteCarloWorkload(name, mcArgs) wmspec.setSpecUrl("/path/to/workload") getFirstTask(wmspec).addProduction(totalevents = 10000) return wmspec def getDBS(self, wmspec): topLevelTask = getFirstTask(wmspec) inputDataset = topLevelTask.inputDataset() dbs = MockDBSReader(inputDataset.dbsurl) #dbsDict = {self.inputDataset.dbsurl : self.dbs} return dbs def createWMBSHelperWithTopTask(self, wmspec, block, mask = None): topLevelTask = getFirstTask(wmspec) wmbs = WMBSHelper(wmspec, block, mask, cachepath = self.workDir) if block: block = self.dbs.getFileBlock(block)[block] wmbs.createSubscriptionAndAddFiles(block = block) return wmbs # def testProduction(self): # """Production workflow""" # pass def testReReco(self): """ReReco workflow""" # create workflow block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)) self.assertEqual(len(files), 1) def testReRecoBlackRunRestriction(self): """ReReco workflow with Run restrictions""" block = self.dataset + "#2" #add run blacklist self.topLevelTask.setInputRunBlacklist([1, 2, 3, 4]) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 0) def testReRecoWhiteRunRestriction(self): block = self.dataset + "#2" # Run Whitelist self.topLevelTask.setInputRunWhitelist([2]) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), GlobalParams.numOfFilesPerBlock()) def testDuplicateFileInsert(self): # using default wmspec block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)[block]['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) firstFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname = "Files.InFileset") numOfFiles = len(wmbsDao.execute(firstFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) # use the new spec with same inputdataset block = self.dataset + "#1" wmspec = self.createWMSpec("TestSpec1") dbs = self.getDBS(wmspec) wmbs = self.createWMBSHelperWithTopTask(wmspec, block) # check duplicate insert dbsFiles = dbs.getFileBlock(block)[block]['Files'] numOfFiles = wmbs.addFiles(dbs.getFileBlock(block)[block]) self.assertEqual(numOfFiles, 0) secondFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname = "Files.InFileset") numOfFiles = len(wmbsDao.execute(secondFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) self.assertNotEqual(firstFileset.id, secondFileset.id) def testParentage(self): """ TODO: add the parentage test. 1. check whether parent files are created in wmbs. 2. check parent files are associated to child. 3. When 2 specs with the same input data (one with parent processing, one without it) is inserted, if one without parent processing inserted first then the other with parent processing insert, it still needs to create parent files although child files are duplicate """ pass def testMCFakeFileInjection(self): """Inject fake Monte Carlo files into WMBS""" self.setupMCWMSpec() mask = Mask(FirstRun = 12, FirstLumi = 1234, FirstEvent = 12345, LastEvent = 999995, LastLumi = 12345, LastRun = 12) # add sites that would normally be added by operator via resource_control locationDAO = self.daoFactory(classname = "Locations.New") ses = [] for site in ['T2_XX_SiteA', 'T2_XX_SiteB']: locationDAO.execute(siteName = site, seName = self.siteDB.cmsNametoSE(site)) ses.append(self.siteDB.cmsNametoSE(site)) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) subscription = wmbs.topLevelSubscription self.assertEqual(1, subscription.exists()) fileset = subscription['fileset'] self.assertEqual(1, fileset.exists()) fileset.loadData() # need to refresh from database self.assertEqual(len(fileset.files), 1) self.assertEqual(len(fileset.parents), 0) self.assertFalse(fileset.open) file = list(fileset.files)[0] self.assertEqual(file['events'], mask['LastEvent'] - mask['FirstEvent'] + 1) # inclusive range self.assertEqual(file['merged'], False) # merged files get added to dbs self.assertEqual(len(file['parents']), 0) #file.loadData() self.assertEqual(sorted(file['locations']), sorted(ses)) self.assertEqual(len(file.getParentLFNs()), 0) self.assertEqual(len(file.getRuns()), 1) run = file.getRuns()[0] self.assertEqual(run.run, mask['FirstRun']) self.assertEqual(run.lumis[0], mask['FirstLumi']) self.assertEqual(run.lumis[-1], mask['LastLumi']) self.assertEqual(len(run.lumis), mask['LastLumi'] - mask['FirstLumi'] + 1)
def testGetOutputMapDAO(self): """ _testGetOutputMapDAO_ Verify the proper behavior of the GetOutputMapDAO for a variety of different processing chains. """ recoOutputFileset = Fileset(name="RECO") recoOutputFileset.create() mergedRecoOutputFileset = Fileset(name="MergedRECO") mergedRecoOutputFileset.create() alcaOutputFileset = Fileset(name="ALCA") alcaOutputFileset.create() mergedAlcaOutputFileset = Fileset(name="MergedALCA") mergedAlcaOutputFileset.create() dqmOutputFileset = Fileset(name="DQM") dqmOutputFileset.create() mergedDqmOutputFileset = Fileset(name="MergedDQM") mergedDqmOutputFileset.create() cleanupFileset = Fileset(name="Cleanup") cleanupFileset.create() testWorkflow = Workflow(spec="wf001.xml", owner="Steve", name="TestWF", task="None") testWorkflow.create() testWorkflow.addOutput("output", recoOutputFileset, mergedRecoOutputFileset) testWorkflow.addOutput("ALCARECOStreamCombined", alcaOutputFileset, mergedAlcaOutputFileset) testWorkflow.addOutput("DQM", dqmOutputFileset, mergedDqmOutputFileset) testWorkflow.addOutput("output", cleanupFileset) testWorkflow.addOutput("ALCARECOStreamCombined", cleanupFileset) testWorkflow.addOutput("DQM", cleanupFileset) testRecoMergeWorkflow = Workflow(spec="wf002.xml", owner="Steve", name="TestRecoMergeWF", task="None") testRecoMergeWorkflow.create() testRecoMergeWorkflow.addOutput("anything", mergedRecoOutputFileset, mergedRecoOutputFileset) testRecoProcWorkflow = Workflow(spec="wf004.xml", owner="Steve", name="TestRecoProcWF", task="None") testRecoProcWorkflow.create() testAlcaChildWorkflow = Workflow(spec="wf003.xml", owner="Steve", name="TestAlcaChildWF", task="None") testAlcaChildWorkflow.create() inputFile = File(lfn="/path/to/some/lfn", size=600000, events=60000, locations="cmssrm.fnal.gov") inputFile.create() testFileset = Fileset(name="TestFileset") testFileset.create() testFileset.addFile(inputFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="EventBased", type="Processing") testMergeRecoSubscription = Subscription(fileset=recoOutputFileset, workflow=testRecoMergeWorkflow, split_algo="WMBSMergeBySize", type="Merge") testProcRecoSubscription = Subscription(fileset=recoOutputFileset, workflow=testRecoProcWorkflow, split_algo="FileBased", type="Processing") testChildAlcaSubscription = Subscription(fileset=alcaOutputFileset, workflow=testAlcaChildWorkflow, split_algo="FileBased", type="Processing") testSubscription.create() testMergeRecoSubscription.create() testProcRecoSubscription.create() testChildAlcaSubscription.create() testSubscription.acquireFiles() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job(name="SplitJobA", files=[inputFile]) testJob.create(group=testJobGroup) testJob["state"] = "complete" testJob.save() outputMapAction = self.daoFactory(classname="Jobs.GetOutputMap") outputMap = outputMapAction.execute(jobID=testJob["id"]) assert len(outputMap.keys()) == 3, \ "Error: Wrong number of outputs for primary workflow." goldenMap = {"output": (recoOutputFileset.id, mergedRecoOutputFileset.id), "ALCARECOStreamCombined": (alcaOutputFileset.id, mergedAlcaOutputFileset.id), "DQM": (dqmOutputFileset.id, mergedDqmOutputFileset.id)} for outputID in outputMap.keys(): for outputFilesets in outputMap[outputID]: if outputFilesets["merged_output_fileset"] is None: self.assertEqual(outputFilesets["output_fileset"], cleanupFileset.id, "Error: Cleanup fileset is wrong.") continue self.assertTrue(outputID in goldenMap.keys(), "Error: Output identifier is missing.") self.assertEqual(outputFilesets["output_fileset"], goldenMap[outputID][0], "Error: Output fileset is wrong.") self.assertEqual(outputFilesets["merged_output_fileset"], goldenMap[outputID][1], "Error: Merged output fileset is wrong.") del goldenMap[outputID] self.assertEqual(len(goldenMap.keys()), 0, "Error: Missing output maps.") return
class ReportIntegrationTest(unittest.TestCase): """ _ReportIntegrationTest_ """ def setUp(self): """ _setUp_ Setup the database and WMBS for the test. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMComponent.DBS3Buffer", "WMCore.WMBS"], useDefault = False) myThread = threading.currentThread() self.daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) self.dbsfactory = DAOFactory(package = "WMComponent.DBS3Buffer", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = self.daofactory(classname = "Locations.New") locationAction.execute(siteName = "site1", pnn = "T1_US_FNAL_Disk") inputFile = File(lfn = "/path/to/some/lfn", size = 10, events = 10, locations = "T1_US_FNAL_Disk") inputFile.create() inputFileset = Fileset(name = "InputFileset") inputFileset.create() inputFileset.addFile(inputFile) inputFileset.commit() unmergedFileset = Fileset(name = "UnmergedFileset") unmergedFileset.create() mergedFileset = Fileset(name = "MergedFileset") mergedFileset.create() procWorkflow = Workflow(spec = "wf001.xml", owner = "Steve", name = "TestWF", task = "/TestWF/None") procWorkflow.create() procWorkflow.addOutput("outputRECORECO", unmergedFileset) mergeWorkflow = Workflow(spec = "wf002.xml", owner = "Steve", name = "MergeWF", task = "/MergeWF/None") mergeWorkflow.create() mergeWorkflow.addOutput("Merged", mergedFileset) insertWorkflow = self.dbsfactory(classname = "InsertWorkflow") insertWorkflow.execute("TestWF", "/TestWF/None", 0, 0, 0, 0) insertWorkflow.execute("MergeWF", "/MergeWF/None", 0, 0, 0, 0) self.procSubscription = Subscription(fileset = inputFileset, workflow = procWorkflow, split_algo = "FileBased", type = "Processing") self.procSubscription.create() self.procSubscription.acquireFiles() self.mergeSubscription = Subscription(fileset = unmergedFileset, workflow = mergeWorkflow, split_algo = "WMBSMergeBySize", type = "Merge") self.mergeSubscription.create() self.procJobGroup = JobGroup(subscription = self.procSubscription) self.procJobGroup.create() self.mergeJobGroup = JobGroup(subscription = self.mergeSubscription) self.mergeJobGroup.create() self.testJob = Job(name = "testJob", files = [inputFile]) self.testJob.create(group = self.procJobGroup) self.testJob["state"] = "complete" myThread = threading.currentThread() self.daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) self.stateChangeAction = self.daofactory(classname = "Jobs.ChangeState") self.setFWJRAction = self.daofactory(classname = "Jobs.SetFWJRPath") self.getJobTypeAction = self.daofactory(classname = "Jobs.GetType") locationAction = self.daofactory(classname = "Locations.New") locationAction.execute(siteName = "cmssrm.fnal.gov") self.stateChangeAction.execute(jobs = [self.testJob]) self.tempDir = tempfile.mkdtemp() return def tearDown(self): """ _tearDown_ Clear out the database and the pickled report file. """ self.testInit.clearDatabase() try: os.remove(os.path.join(self.tempDir, "ProcReport.pkl")) os.remove(os.path.join(self.tempDir, "MergeReport.pkl")) except Exception as ex: pass try: os.rmdir(self.tempDir) except Exception as ex: pass return def createConfig(self, workerThreads): """ _createConfig_ Create a config for the JobAccountant with the given number of worker threads. This config needs to include information for connecting to the database as the component will create it's own database connections. These parameters are still pulled from the environment. """ config = self.testInit.getConfiguration() self.testInit.generateWorkDir(config) config.section_("JobStateMachine") config.JobStateMachine.couchurl = os.getenv("COUCHURL") config.JobStateMachine.couchDBName = "report_integration_t" config.JobStateMachine.jobSummaryDBName = "report_integration_wmagent_summary_t" config.component_("JobAccountant") config.JobAccountant.pollInterval = 60 config.JobAccountant.workerThreads = workerThreads config.JobAccountant.componentDir = os.getcwd() config.JobAccountant.logLevel = 'SQLDEBUG' config.component_("TaskArchiver") config.TaskArchiver.localWMStatsURL = "%s/%s" % (config.JobStateMachine.couchurl, config.JobStateMachine.jobSummaryDBName) return config def verifyJobSuccess(self, jobID): """ _verifyJobSuccess_ Verify that the metadata for a successful job is correct. This will check the outcome, retry count and state. """ testJob = Job(id = jobID) testJob.load() assert testJob["state"] == "success", \ "Error: test job in wrong state: %s" % testJob["state"] assert testJob["retry_count"] == 0, \ "Error: test job has wrong retry count: %s" % testJob["retry_count"] assert testJob["outcome"] == "success", \ "Error: test job has wrong outcome: %s" % testJob["outcome"] return def verifyFileMetaData(self, jobID, fwkJobReportFiles): """ _verifyFileMetaData_ Verify that all the files that were output by a job made it into WMBS correctly. Compare the contents of WMBS to the files in the frameworks job report. Note that fwkJobReportFiles is a list of DataStructs File objects. """ testJob = Job(id = jobID) testJob.loadData() inputLFNs = [] for inputFile in testJob["input_files"]: inputLFNs.append(inputFile["lfn"]) for fwkJobReportFile in fwkJobReportFiles: outputFile = File(lfn = fwkJobReportFile["lfn"]) outputFile.loadData(parentage = 1) assert outputFile["events"] == int(fwkJobReportFile["events"]), \ "Error: Output file has wrong events: %s, %s" % \ (outputFile["events"], fwkJobReportFile["events"]) assert outputFile["size"] == int(fwkJobReportFile["size"]), \ "Error: Output file has wrong size: %s, %s" % \ (outputFile["size"], fwkJobReportFile["size"]) for ckType in fwkJobReportFile["checksums"]: assert ckType in outputFile["checksums"], \ "Error: Output file is missing checksums: %s" % ckType assert outputFile["checksums"][ckType] == fwkJobReportFile["checksums"][ckType], \ "Error: Checksums don't match." assert len(fwkJobReportFile["checksums"]) == \ len(outputFile["checksums"]), \ "Error: Wrong number of checksums." jobType = self.getJobTypeAction.execute(jobID = jobID) if jobType == "Merge": assert str(outputFile["merged"]) == "True", \ "Error: Merge jobs should output merged files." else: assert outputFile["merged"] == fwkJobReportFile["merged"], \ "Error: Output file merged output is wrong: %s, %s" % \ (outputFile["merged"], fwkJobReportFile["merged"]) assert len(outputFile["locations"]) == 1, \ "Error: outputfile should have one location: %s" % outputFile["locations"] assert list(outputFile["locations"])[0] == list(fwkJobReportFile["locations"])[0], \ "Error: wrong location for file." assert len(outputFile["parents"]) == len(inputLFNs), \ "Error: Output file has wrong number of parents." for outputParent in outputFile["parents"]: assert outputParent["lfn"] in inputLFNs, \ "Error: Unknown parent file: %s" % outputParent["lfn"] fwjrRuns = {} for run in fwkJobReportFile["runs"]: fwjrRuns[run.run] = run.lumis for run in outputFile["runs"]: assert run.run in fwjrRuns, \ "Error: Extra run in output: %s" % run.run for lumi in run: assert lumi in fwjrRuns[run.run], \ "Error: Extra lumi: %s" % lumi fwjrRuns[run.run].remove(lumi) if len(fwjrRuns[run.run]) == 0: del fwjrRuns[run.run] assert len(fwjrRuns) == 0, \ "Error: Missing runs, lumis: %s" % fwjrRuns testJobGroup = JobGroup(id = testJob["jobgroup"]) testJobGroup.loadData() jobGroupFileset = testJobGroup.output jobGroupFileset.loadData() assert outputFile["id"] in jobGroupFileset.getFiles(type = "id"), \ "Error: output file not in jobgroup fileset." if testJob["mask"]["FirstEvent"] == None: assert outputFile["first_event"] == 0, \ "Error: first event not set correctly: 0, %s" % \ outputFile["first_event"] else: assert testJob["mask"]["FirstEvent"] == outputFile["first_event"], \ "Error: last event not set correctly: %s, %s" % \ (testJob["mask"]["FirstEvent"], outputFile["first_event"]) return def testReportHandling(self): """ _testReportHandling_ Verify that we're able to parse a CMSSW report, convert it to a Report() style report, pickle it and then have the accountant process it. """ self.procPath = os.path.join(WMCore.WMBase.getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWProcessingReport.xml") myReport = Report("cmsRun1") myReport.parse(self.procPath) # Fake some metadata that should be added by the stageout scripts. for fileRef in myReport.getAllFileRefsFromStep("cmsRun1"): fileRef.size = 1024 fileRef.location = "cmssrm.fnal.gov" fwjrPath = os.path.join(self.tempDir, "ProcReport.pkl") cmsRunStep = myReport.retrieveStep("cmsRun1") cmsRunStep.status = 0 myReport.setTaskName('/TestWF/None') myReport.persist(fwjrPath) self.setFWJRAction.execute(jobID = self.testJob["id"], fwjrPath = fwjrPath) pFile = DBSBufferFile(lfn = "/path/to/some/lfn", size = 600000, events = 60000) pFile.setAlgorithm(appName = "cmsRun", appVer = "UNKNOWN", appFam = "RECO", psetHash = "GIBBERISH", configContent = "MOREGIBBERISH") pFile.setDatasetPath("/bogus/dataset/path") #pFile.addRun(Run(1, *[45])) pFile.create() config = self.createConfig(workerThreads = 1) accountant = JobAccountantPoller(config) accountant.setup() accountant.algorithm() self.verifyJobSuccess(self.testJob["id"]) self.verifyFileMetaData(self.testJob["id"], myReport.getAllFilesFromStep("cmsRun1")) inputFile = File(lfn = "/store/backfill/2/unmerged/WMAgentCommissioining10/MinimumBias/RECO/rereco_GR09_R_34X_V5_All_v1/0000/outputRECORECO.root") inputFile.load() self.testMergeJob = Job(name = "testMergeJob", files = [inputFile]) self.testMergeJob.create(group = self.mergeJobGroup) self.testMergeJob["state"] = "complete" self.stateChangeAction.execute(jobs = [self.testMergeJob]) self.mergePath = os.path.join(WMCore.WMBase.getTestBase(), "WMCore_t/FwkJobReport_t/CMSSWMergeReport.xml") myReport = Report("mergeReco") myReport.parse(self.mergePath) # Fake some metadata that should be added by the stageout scripts. for fileRef in myReport.getAllFileRefsFromStep("mergeReco"): fileRef.size = 1024 fileRef.location = "cmssrm.fnal.gov" fileRef.dataset = {"applicationName": "cmsRun", "applicationVersion": "CMSSW_3_4_2_patch1", "primaryDataset": "MinimumBias", "processedDataset": "Rereco-v1", "dataTier": "RECO"} fwjrPath = os.path.join(self.tempDir, "MergeReport.pkl") myReport.setTaskName('/MergeWF/None') cmsRunStep = myReport.retrieveStep("mergeReco") cmsRunStep.status = 0 myReport.persist(fwjrPath) self.setFWJRAction.execute(jobID = self.testMergeJob["id"], fwjrPath = fwjrPath) accountant.algorithm() self.verifyJobSuccess(self.testMergeJob["id"]) self.verifyFileMetaData(self.testMergeJob["id"], myReport.getAllFilesFromStep("mergeReco")) return
class WMBSHelperTest(unittest.TestCase): def setUp(self): """ _setUp_ """ self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setupCouch("wmbshelper_t/jobs", "JobDump") self.testInit.setupCouch("wmbshelper_t/fwjrs", "FWJRDump") self.testInit.setupCouch("config_test", "GroupUser", "ConfigCache") os.environ["COUCHDB"] = "wmbshelper_t" self.testInit.setSchema(customModules=[ "WMCore.WMBS", "WMComponent.DBS3Buffer", "WMCore.BossAir", "WMCore.ResourceControl" ], useDefault=False) self.workDir = self.testInit.generateWorkDir() self.wmspec = self.createWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = MockDBSReader(self.inputDataset.dbsurl) self.daoFactory = DAOFactory(package="WMCore.WMBS", logger=threading.currentThread().logger, dbinterface=threading.currentThread().dbi) return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.clearDatabase() self.testInit.tearDownCouch() self.testInit.delWorkDir() return def setupForKillTest(self, baAPI=None): """ _setupForKillTest_ Inject a workflow into WMBS that has a processing task, a merge task and a cleanup task. Inject files into the various tasks at various processing states (acquired, complete, available...). Also create jobs for each subscription in various states. """ myThread = threading.currentThread() daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daoFactory(classname="Locations.New") changeStateAction = daoFactory(classname="Jobs.ChangeState") resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertThreshold(siteName = 'site1', taskType = 'Processing', \ maxSlots = 10000, pendingSlots = 10000) userDN = 'someDN' userAction = daoFactory(classname="Users.New") userAction.execute(dn=userDN, group_name='DEFAULT', role_name='DEFAULT') inputFileset = Fileset("input") inputFileset.create() inputFileA = File("lfnA", locations="goodse.cern.ch") inputFileB = File("lfnB", locations="goodse.cern.ch") inputFileC = File("lfnC", locations="goodse.cern.ch") inputFileA.create() inputFileB.create() inputFileC.create() inputFileset.addFile(inputFileA) inputFileset.addFile(inputFileB) inputFileset.addFile(inputFileC) inputFileset.commit() unmergedOutputFileset = Fileset("unmerged") unmergedOutputFileset.create() unmergedFileA = File("ulfnA", locations="goodse.cern.ch") unmergedFileB = File("ulfnB", locations="goodse.cern.ch") unmergedFileC = File("ulfnC", locations="goodse.cern.ch") unmergedFileA.create() unmergedFileB.create() unmergedFileC.create() unmergedOutputFileset.addFile(unmergedFileA) unmergedOutputFileset.addFile(unmergedFileB) unmergedOutputFileset.addFile(unmergedFileC) unmergedOutputFileset.commit() mainProcWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="Proc") mainProcWorkflow.create() mainProcMergeWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="ProcMerge") mainProcMergeWorkflow.create() mainCleanupWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="Cleanup") mainCleanupWorkflow.create() self.mainProcSub = Subscription(fileset=inputFileset, workflow=mainProcWorkflow, type="Processing") self.mainProcSub.create() self.mainProcSub.acquireFiles(inputFileA) self.mainProcSub.completeFiles(inputFileB) procJobGroup = JobGroup(subscription=self.mainProcSub) procJobGroup.create() self.procJobA = Job(name="ProcJobA") self.procJobA["state"] = "new" self.procJobA["location"] = "site1" self.procJobB = Job(name="ProcJobB") self.procJobB["state"] = "executing" self.procJobB["location"] = "site1" self.procJobC = Job(name="ProcJobC") self.procJobC["state"] = "complete" self.procJobC["location"] = "site1" self.procJobA.create(procJobGroup) self.procJobB.create(procJobGroup) self.procJobC.create(procJobGroup) self.mainMergeSub = Subscription(fileset=unmergedOutputFileset, workflow=mainProcMergeWorkflow, type="Merge") self.mainMergeSub.create() self.mainMergeSub.acquireFiles(unmergedFileA) self.mainMergeSub.failFiles(unmergedFileB) mergeJobGroup = JobGroup(subscription=self.mainMergeSub) mergeJobGroup.create() self.mergeJobA = Job(name="MergeJobA") self.mergeJobA["state"] = "exhausted" self.mergeJobA["location"] = "site1" self.mergeJobB = Job(name="MergeJobB") self.mergeJobB["state"] = "cleanout" self.mergeJobB["location"] = "site1" self.mergeJobC = Job(name="MergeJobC") self.mergeJobC["state"] = "new" self.mergeJobC["location"] = "site1" self.mergeJobA.create(mergeJobGroup) self.mergeJobB.create(mergeJobGroup) self.mergeJobC.create(mergeJobGroup) self.mainCleanupSub = Subscription(fileset=unmergedOutputFileset, workflow=mainCleanupWorkflow, type="Cleanup") self.mainCleanupSub.create() self.mainCleanupSub.acquireFiles(unmergedFileA) self.mainCleanupSub.completeFiles(unmergedFileB) cleanupJobGroup = JobGroup(subscription=self.mainCleanupSub) cleanupJobGroup.create() self.cleanupJobA = Job(name="CleanupJobA") self.cleanupJobA["state"] = "new" self.cleanupJobA["location"] = "site1" self.cleanupJobB = Job(name="CleanupJobB") self.cleanupJobB["state"] = "executing" self.cleanupJobB["location"] = "site1" self.cleanupJobC = Job(name="CleanupJobC") self.cleanupJobC["state"] = "complete" self.cleanupJobC["location"] = "site1" self.cleanupJobA.create(cleanupJobGroup) self.cleanupJobB.create(cleanupJobGroup) self.cleanupJobC.create(cleanupJobGroup) jobList = [ self.procJobA, self.procJobB, self.procJobC, self.mergeJobA, self.mergeJobB, self.mergeJobC, self.cleanupJobA, self.cleanupJobB, self.cleanupJobC ] changeStateAction.execute(jobList) if baAPI: for job in jobList: job['plugin'] = 'TestPlugin' job['userdn'] = userDN job['usergroup'] = 'DEFAULT' job['userrole'] = 'DEFAULT' job['custom']['location'] = 'site1' baAPI.createNewJobs(wmbsJobs=jobList) # We'll create an unrelated workflow to verify that it isn't affected # by the killing code. bogusFileset = Fileset("dontkillme") bogusFileset.create() bogusFileA = File("bogus/lfnA", locations="goodse.cern.ch") bogusFileA.create() bogusFileset.addFile(bogusFileA) bogusFileset.commit() bogusWorkflow = Workflow(spec="spec2", owner="Steve", name="Bogus", task="Proc") bogusWorkflow.create() self.bogusSub = Subscription(fileset=bogusFileset, workflow=bogusWorkflow, type="Processing") self.bogusSub.create() self.bogusSub.acquireFiles(bogusFileA) return def verifyFileKillStatus(self): """ _verifyFileKillStatus_ Verify that all files were killed correctly. The status of files in Cleanup and LogCollect subscriptions isn't modified. Status of already completed and failed files is not modified. Also verify that the bogus subscription is untouched. """ failedFiles = self.mainProcSub.filesOfStatus("Failed") acquiredFiles = self.mainProcSub.filesOfStatus("Acquired") completedFiles = self.mainProcSub.filesOfStatus("Completed") availableFiles = self.mainProcSub.filesOfStatus("Available") bogusAcquiredFiles = self.bogusSub.filesOfStatus("Acquired") self.assertEqual(len(availableFiles), 0, \ "Error: There should be no available files.") self.assertEqual(len(acquiredFiles), 0, \ "Error: There should be no acquired files.") self.assertEqual(len(bogusAcquiredFiles), 1, \ "Error: There should be one acquired file.") self.assertEqual(len(completedFiles), 3, \ "Error: There should be only one completed file.") goldenLFNs = ["lfnA", "lfnB", "lfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra completed file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(failedFiles), 0, \ "Error: There should be no failed files.") self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainMergeSub.filesOfStatus("Failed") acquiredFiles = self.mainMergeSub.filesOfStatus("Acquired") completedFiles = self.mainMergeSub.filesOfStatus("Completed") availableFiles = self.mainMergeSub.filesOfStatus("Available") self.assertEqual(len(acquiredFiles), 0, \ "Error: Merge subscription should have 0 acq files.") self.assertEqual(len(availableFiles), 0, \ "Error: Merge subscription should have 0 avail files.") self.assertEqual(len(failedFiles), 1, \ "Error: Merge subscription should have 1 failed files.") self.assertEqual( list(failedFiles)[0]["lfn"], "ulfnB", "Error: Wrong failed file.") self.assertEqual(len(completedFiles), 2, \ "Error: Merge subscription should have 2 compl files.") goldenLFNs = ["ulfnA", "ulfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra complete file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainCleanupSub.filesOfStatus("Failed") acquiredFiles = self.mainCleanupSub.filesOfStatus("Acquired") completedFiles = self.mainCleanupSub.filesOfStatus("Completed") availableFiles = self.mainCleanupSub.filesOfStatus("Available") self.assertEqual(len(failedFiles), 0, \ "Error: Cleanup subscription should have 0 fai files.") self.assertEqual(len(acquiredFiles), 1, \ "Error: There should be only one acquired file.") self.assertEqual(list(acquiredFiles)[0]["lfn"], "ulfnA", \ "Error: Wrong acquired LFN.") self.assertEqual(len(completedFiles), 1, \ "Error: There should be only one completed file.") self.assertEqual(list(completedFiles)[0]["lfn"], "ulfnB", \ "Error: Wrong completed LFN.") self.assertEqual(len(availableFiles), 1, \ "Error: There should be only one available file.") self.assertEqual(list(availableFiles)[0]["lfn"], "ulfnC", \ "Error: Wrong completed LFN.") return def verifyJobKillStatus(self): """ _verifyJobKillStatus_ Verify that jobs are killed correctly. Jobs belonging to Cleanup and LogCollect subscriptions are not killed. The status of jobs that have already finished running is not changed. """ self.procJobA.load() self.procJobB.load() self.procJobC.load() self.assertEqual(self.procJobA["state"], "killed", \ "Error: Proc job A should be killed.") self.assertEqual(self.procJobB["state"], "killed", \ "Error: Proc job B should be killed.") self.assertEqual(self.procJobC["state"], "complete", \ "Error: Proc job C should be complete.") self.mergeJobA.load() self.mergeJobB.load() self.mergeJobC.load() self.assertEqual(self.mergeJobA["state"], "exhausted", \ "Error: Merge job A should be exhausted.") self.assertEqual(self.mergeJobB["state"], "cleanout", \ "Error: Merge job B should be cleanout.") self.assertEqual(self.mergeJobC["state"], "killed", \ "Error: Merge job C should be killed.") self.cleanupJobA.load() self.cleanupJobB.load() self.cleanupJobC.load() self.assertEqual(self.cleanupJobA["state"], "new", \ "Error: Cleanup job A should be new.") self.assertEqual(self.cleanupJobB["state"], "executing", \ "Error: Cleanup job B should be executing.") self.assertEqual(self.cleanupJobC["state"], "complete", \ "Error: Cleanup job C should be complete.") return def createTestWMSpec(self): """ _createTestWMSpec_ Create a WMSpec that has a processing, merge, cleanup and skims tasks that can be used by the subscription creation test. """ testWorkload = WMWorkloadHelper(WMWorkload("TestWorkload")) testWorkload.setDashboardActivity("TestReReco") testWorkload.setSpecUrl("/path/to/workload") testWorkload.setOwnerDetails("sfoulkes", "DMWM", {'dn': 'MyDN'}) procTask = testWorkload.newTask("ProcessingTask") procTask.setTaskType("Processing") procTask.setSplittingAlgorithm("FileBased", files_per_job=1) procTaskCMSSW = procTask.makeStep("cmsRun1") procTaskCMSSW.setStepType("CMSSW") procTaskCMSSWHelper = procTaskCMSSW.getTypeHelper() procTask.setTaskType("Processing") procTask.setSiteWhitelist(["site1"]) procTask.setSiteBlacklist(["site2"]) procTask.applyTemplates() procTaskCMSSWHelper.addOutputModule("OutputA", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierA", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) mergeTask = procTask.addTask("MergeTask") mergeTask.setInputReference(procTaskCMSSW, outputModule="OutputA") mergeTask.setTaskType("Merge") mergeTask.setSplittingAlgorithm("WMBSMergeBySize", min_merge_size=1, max_merge_size=2, max_merge_events=3) mergeTaskCMSSW = mergeTask.makeStep("cmsRun1") mergeTaskCMSSW.setStepType("CMSSW") mergeTaskCMSSWHelper = mergeTaskCMSSW.getTypeHelper() mergeTask.setTaskType("Merge") mergeTask.applyTemplates() mergeTaskCMSSWHelper.addOutputModule("Merged", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierA", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) cleanupTask = procTask.addTask("CleanupTask") cleanupTask.setInputReference(procTaskCMSSW, outputModule="OutputA") cleanupTask.setTaskType("Merge") cleanupTask.setSplittingAlgorithm("SiblingProcessingBased", files_per_job=50) cleanupTaskCMSSW = cleanupTask.makeStep("cmsRun1") cleanupTaskCMSSW.setStepType("CMSSW") cleanupTaskCMSSWHelper = cleanupTaskCMSSW.getTypeHelper() cleanupTask.setTaskType("Cleanup") cleanupTask.applyTemplates() skimTask = mergeTask.addTask("SkimTask") skimTask.setTaskType("Skim") skimTask.setInputReference(mergeTaskCMSSW, outputModule="Merged") skimTask.setSplittingAlgorithm("FileBased", files_per_job=1, include_parents=True) skimTaskCMSSW = skimTask.makeStep("cmsRun1") skimTaskCMSSW.setStepType("CMSSW") skimTaskCMSSWHelper = skimTaskCMSSW.getTypeHelper() skimTask.setTaskType("Skim") skimTask.applyTemplates() skimTaskCMSSWHelper.addOutputModule("SkimOutputA", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierA", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) skimTaskCMSSWHelper.addOutputModule("SkimOutputB", primaryDataset="bogusPrimary", processedDataset="bogusProcessed", dataTier="DataTierA", lfnBase="bogusUnmerged", mergedLFNBase="bogusMerged", filterName=None) return testWorkload def setupMCWMSpec(self): """Setup MC workflow""" self.wmspec = self.createMCWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = None self.siteDB = fakeSiteDB() # add sites that would normally be added by operator via resource_control locationDAO = self.daoFactory(classname="Locations.New") self.ses = [] for site in ['T2_XX_SiteA', 'T2_XX_SiteB']: locationDAO.execute(siteName=site, seName=self.siteDB.cmsNametoSE(site)[0]) self.ses.append(self.siteDB.cmsNametoSE(site)[0]) def createWMSpec(self, name='ReRecoWorkload'): factory = ReRecoWorkloadFactory() rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) wmspec = factory.factoryWorkloadConstruction(name, rerecoArgs) wmspec.setSpecUrl("/path/to/workload") wmspec.setSubscriptionInformation(custodialSites=[], nonCustodialSites=[], autoApproveSites=[], priority="Low", custodialSubType="Move") return wmspec def createMCWMSpec(self, name='MonteCarloWorkload'): wmspec = monteCarloWorkload(name, mcArgs) wmspec.setSpecUrl("/path/to/workload") getFirstTask(wmspec).addProduction(totalevents=10000) return wmspec def getDBS(self, wmspec): topLevelTask = getFirstTask(wmspec) inputDataset = topLevelTask.inputDataset() dbs = MockDBSReader(inputDataset.dbsurl) #dbsDict = {self.inputDataset.dbsurl : self.dbs} return dbs def createWMBSHelperWithTopTask(self, wmspec, block, mask=None, parentFlag=False, detail=False): topLevelTask = getFirstTask(wmspec) wmbs = WMBSHelper(wmspec, topLevelTask.name(), block, mask, cachepath=self.workDir) if block: if parentFlag: block = self.dbs.getFileBlockWithParents(block)[block] else: block = self.dbs.getFileBlock(block)[block] sub, files = wmbs.createSubscriptionAndAddFiles(block=block) if detail: return wmbs, sub, files else: return wmbs def testKillWorkflow(self): """ _testKillWorkflow_ Verify that workflow killing works correctly. """ configFile = EmulatorSetup.setupWMAgentConfig() config = loadConfigurationFile(configFile) baAPI = BossAirAPI(config=config) # Create nine jobs self.setupForKillTest(baAPI=baAPI) self.assertEqual(len(baAPI._listRunJobs()), 9) killWorkflow("Main", config, config) self.verifyFileKillStatus() self.verifyJobKillStatus() self.assertEqual(len(baAPI._listRunJobs()), 8) EmulatorSetup.deleteConfig(configFile) return def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', seName='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testTopLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual( procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual( mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset=unmergedProcOutput, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', seName='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testTopLevelTask, testWMBSHelper.topLevelFileset) testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") # create the subscription for multiple top task (MergeTask and CleanupTask for the same block) for task in testWorkload.getTopLevelTask(): testResubmitWMBSHelper = WMBSHelper(testWorkload, task.name(), "SomeBlock2", cachepath=self.workDir) testResubmitWMBSHelper.createTopLevelFileset() testResubmitWMBSHelper._createSubscriptionsInWMBS( task, testResubmitWMBSHelper.topLevelFileset) mergeWorkflow = Workflow(name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset( name="ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset=topLevelFileset, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def testReReco(self): """ReReco workflow""" # create workflow block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)) self.assertEqual(len(files), 1) def testReRecoBlackRunRestriction(self): """ReReco workflow with Run restrictions""" block = self.dataset + "#2" #add run blacklist self.topLevelTask.setInputRunBlacklist([1, 2, 3, 4]) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 0) def testReRecoWhiteRunRestriction(self): block = self.dataset + "#2" # Run Whitelist self.topLevelTask.setInputRunWhitelist([2]) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), GlobalParams.numOfFilesPerBlock()) def testLumiMaskRestrictionsOK(self): block = self.dataset + "#1" self.wmspec.getTopLevelTask()[0].data.input.splitting.runs = ['1'] self.wmspec.getTopLevelTask()[0].data.input.splitting.lumis = ['1,1'] wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), GlobalParams.numOfFilesPerBlock()) def testLumiMaskRestrictionsKO(self): block = self.dataset + "#1" self.wmspec.getTopLevelTask()[0].data.input.splitting.runs = [ '123454321' ] self.wmspec.getTopLevelTask()[0].data.input.splitting.lumis = [ '123,123' ] wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 0) def testDuplicateFileInsert(self): # using default wmspec block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)[block]['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) firstFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname="Files.InFileset") numOfFiles = len(wmbsDao.execute(firstFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) # use the new spec with same inputdataset block = self.dataset + "#1" wmspec = self.createWMSpec("TestSpec1") dbs = self.getDBS(wmspec) wmbs = self.createWMBSHelperWithTopTask(wmspec, block) # check duplicate insert dbsFiles = dbs.getFileBlock(block)[block]['Files'] numOfFiles = wmbs.addFiles(dbs.getFileBlock(block)[block]) self.assertEqual(numOfFiles, 0) secondFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname="Files.InFileset") numOfFiles = len(wmbsDao.execute(secondFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) self.assertNotEqual(firstFileset.id, secondFileset.id) def testDuplicateSubscription(self): """Can't duplicate subscriptions""" # using default wmspec block = self.dataset + "#1" wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) filesetId = wmbs.topLevelFileset.id subId = wmbs.topLevelSubscription['id'] # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)[block]['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) firstFileset = wmbs.topLevelFileset self.assertEqual(numOfFiles, len(dbsFiles)) # reinsert subscription - shouldn't create anything new wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files)) self.assertEqual(filesetId, wmbs.topLevelFileset.id) self.assertEqual(subId, wmbs.topLevelSubscription['id']) # now do a montecarlo workflow self.setupMCWMSpec() mask = Mask(FirstRun=12, FirstLumi=1234, FirstEvent=12345, LastEvent=999995, LastLumi=12345, LastRun=12) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) filesetId = wmbs.topLevelFileset.id subId = wmbs.topLevelSubscription['id'] # check initially inserted files. numDbsFiles = 1 self.assertEqual(numOfFiles, numDbsFiles) firstFileset = wmbs.topLevelFileset self.assertEqual(numOfFiles, numDbsFiles) # reinsert subscription - shouldn't create anything new wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) wmbs.topLevelFileset.loadData() self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files)) self.assertEqual(filesetId, wmbs.topLevelFileset.id) self.assertEqual(subId, wmbs.topLevelSubscription['id']) def testParentage(self): """ 1. check whether parent files are created in wmbs. 2. check parent files are associated to child. 3. When 2 specs with the same input data (one with parent processing, one without it) is inserted, if one without parent processing inserted first then the other with parent processing insert, it still needs to create parent files although child files are duplicate """ block = self.dataset + "#1" wmbs, sub, numFiles = self.createWMBSHelperWithTopTask( self.wmspec, block, parentFlag=False, detail=True) # file creation without parents self.assertEqual(GlobalParams.numOfFilesPerBlock(), numFiles) wmbs.topLevelFileset.loadData() for child in wmbs.topLevelFileset.files: # no parent per child self.assertEqual(len(child["parents"]), 0) wmbs, sub, numFiles = self.createWMBSHelperWithTopTask(self.wmspec, block, parentFlag=True, detail=True) self.assertEqual(GlobalParams.numOfFilesPerBlock(), numFiles) wmbs.topLevelFileset.loadData() for child in wmbs.topLevelFileset.files: # one parent per child self.assertEqual(len(child["parents"]), 1) def testMCFakeFileInjection(self): """Inject fake Monte Carlo files into WMBS""" self.setupMCWMSpec() mask = Mask(FirstRun=12, FirstLumi=1234, FirstEvent=12345, LastEvent=999995, LastLumi=12345, LastRun=12) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) subscription = wmbs.topLevelSubscription self.assertEqual(1, subscription.exists()) fileset = subscription['fileset'] self.assertEqual(1, fileset.exists()) fileset.loadData() # need to refresh from database self.assertEqual(len(fileset.files), 1) self.assertEqual(len(fileset.parents), 0) self.assertFalse(fileset.open) file = list(fileset.files)[0] self.assertEqual(file['events'], mask['LastEvent'] - mask['FirstEvent'] + 1) # inclusive range self.assertEqual(file['merged'], False) # merged files get added to dbs self.assertEqual(len(file['parents']), 0) #file.loadData() self.assertEqual(sorted(file['locations']), sorted(self.ses)) self.assertEqual(len(file.getParentLFNs()), 0) self.assertEqual(len(file.getRuns()), 1) run = file.getRuns()[0] self.assertEqual(run.run, mask['FirstRun']) self.assertEqual(run.lumis[0], mask['FirstLumi']) self.assertEqual(run.lumis[-1], mask['LastLumi']) self.assertEqual(len(run.lumis), mask['LastLumi'] - mask['FirstLumi'] + 1)
class WMBSHelperTest(EmulatedUnitTestCase): def setUp(self): """ _setUp_ """ super(WMBSHelperTest, self).setUp() self.testInit = TestInitCouchApp(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection(destroyAllDatabase=True) self.testInit.setupCouch("wmbshelper_t/jobs", "JobDump") self.testInit.setupCouch("wmbshelper_t/fwjrs", "FWJRDump") self.testInit.setupCouch("config_test", "GroupUser", "ConfigCache") os.environ["COUCHDB"] = "wmbshelper_t" self.testInit.setSchema(customModules = ["WMCore.WMBS", "WMComponent.DBS3Buffer", "WMCore.BossAir", "WMCore.ResourceControl"], useDefault = False) self.workDir = self.testInit.generateWorkDir() self.wmspec = self.createWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = DBSReader(self.inputDataset.dbsurl) self.daoFactory = DAOFactory(package = "WMCore.WMBS", logger = threading.currentThread().logger, dbinterface = threading.currentThread().dbi) return def tearDown(self): """ _tearDown_ Clear out the database. """ self.testInit.clearDatabase() self.testInit.tearDownCouch() self.testInit.delWorkDir() super(WMBSHelperTest, self).tearDown() return def setupForKillTest(self, baAPI = None): """ _setupForKillTest_ Inject a workflow into WMBS that has a processing task, a merge task and a cleanup task. Inject files into the various tasks at various processing states (acquired, complete, available...). Also create jobs for each subscription in various states. """ myThread = threading.currentThread() daoFactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) dummyLocationAction = daoFactory(classname = "Locations.New") changeStateAction = daoFactory(classname = "Jobs.ChangeState") resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', pnn = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertThreshold(siteName = 'site1', taskType = 'Processing', \ maxSlots = 10000, pendingSlots = 10000) userDN = 'someDN' userAction = daoFactory(classname = "Users.New") userAction.execute(dn = userDN, group_name = 'DEFAULT', role_name = 'DEFAULT') inputFileset = Fileset("input") inputFileset.create() inputFileA = File("lfnA", locations = "goodse.cern.ch") inputFileB = File("lfnB", locations = "goodse.cern.ch") inputFileC = File("lfnC", locations = "goodse.cern.ch") inputFileA.create() inputFileB.create() inputFileC.create() inputFileset.addFile(inputFileA) inputFileset.addFile(inputFileB) inputFileset.addFile(inputFileC) inputFileset.commit() unmergedOutputFileset = Fileset("unmerged") unmergedOutputFileset.create() unmergedFileA = File("ulfnA", locations = "goodse.cern.ch") unmergedFileB = File("ulfnB", locations = "goodse.cern.ch") unmergedFileC = File("ulfnC", locations = "goodse.cern.ch") unmergedFileA.create() unmergedFileB.create() unmergedFileC.create() unmergedOutputFileset.addFile(unmergedFileA) unmergedOutputFileset.addFile(unmergedFileB) unmergedOutputFileset.addFile(unmergedFileC) unmergedOutputFileset.commit() mainProcWorkflow = Workflow(spec = "spec1", owner = "Steve", name = "Main", task = "Proc") mainProcWorkflow.create() mainProcMergeWorkflow = Workflow(spec = "spec1", owner = "Steve", name = "Main", task = "ProcMerge") mainProcMergeWorkflow.create() mainCleanupWorkflow = Workflow(spec = "spec1", owner = "Steve", name = "Main", task = "Cleanup") mainCleanupWorkflow.create() self.mainProcSub = Subscription(fileset = inputFileset, workflow = mainProcWorkflow, type = "Processing") self.mainProcSub.create() self.mainProcSub.acquireFiles(inputFileA) self.mainProcSub.completeFiles(inputFileB) procJobGroup = JobGroup(subscription = self.mainProcSub) procJobGroup.create() self.procJobA = Job(name = "ProcJobA") self.procJobA["state"] = "new" self.procJobA["location"] = "site1" self.procJobB = Job(name = "ProcJobB") self.procJobB["state"] = "executing" self.procJobB["location"] = "site1" self.procJobC = Job(name = "ProcJobC") self.procJobC["state"] = "complete" self.procJobC["location"] = "site1" self.procJobA.create(procJobGroup) self.procJobB.create(procJobGroup) self.procJobC.create(procJobGroup) self.mainMergeSub = Subscription(fileset = unmergedOutputFileset, workflow = mainProcMergeWorkflow, type = "Merge") self.mainMergeSub.create() self.mainMergeSub.acquireFiles(unmergedFileA) self.mainMergeSub.failFiles(unmergedFileB) mergeJobGroup = JobGroup(subscription = self.mainMergeSub) mergeJobGroup.create() self.mergeJobA = Job(name = "MergeJobA") self.mergeJobA["state"] = "exhausted" self.mergeJobA["location"] = "site1" self.mergeJobB = Job(name = "MergeJobB") self.mergeJobB["state"] = "cleanout" self.mergeJobB["location"] = "site1" self.mergeJobC = Job(name = "MergeJobC") self.mergeJobC["state"] = "new" self.mergeJobC["location"] = "site1" self.mergeJobA.create(mergeJobGroup) self.mergeJobB.create(mergeJobGroup) self.mergeJobC.create(mergeJobGroup) self.mainCleanupSub = Subscription(fileset = unmergedOutputFileset, workflow = mainCleanupWorkflow, type = "Cleanup") self.mainCleanupSub.create() self.mainCleanupSub.acquireFiles(unmergedFileA) self.mainCleanupSub.completeFiles(unmergedFileB) cleanupJobGroup = JobGroup(subscription = self.mainCleanupSub) cleanupJobGroup.create() self.cleanupJobA = Job(name = "CleanupJobA") self.cleanupJobA["state"] = "new" self.cleanupJobA["location"] = "site1" self.cleanupJobB = Job(name = "CleanupJobB") self.cleanupJobB["state"] = "executing" self.cleanupJobB["location"] = "site1" self.cleanupJobC = Job(name = "CleanupJobC") self.cleanupJobC["state"] = "complete" self.cleanupJobC["location"] = "site1" self.cleanupJobA.create(cleanupJobGroup) self.cleanupJobB.create(cleanupJobGroup) self.cleanupJobC.create(cleanupJobGroup) jobList = [self.procJobA, self.procJobB, self.procJobC, self.mergeJobA, self.mergeJobB, self.mergeJobC, self.cleanupJobA, self.cleanupJobB, self.cleanupJobC] changeStateAction.execute(jobList) if baAPI: for job in jobList: job['plugin'] = 'TestPlugin' job['userdn'] = userDN job['usergroup'] = 'DEFAULT' job['userrole'] = 'DEFAULT' job['custom']['location'] = 'site1' baAPI.createNewJobs(wmbsJobs = jobList) # We'll create an unrelated workflow to verify that it isn't affected # by the killing code. bogusFileset = Fileset("dontkillme") bogusFileset.create() bogusFileA = File("bogus/lfnA", locations = "goodse.cern.ch") bogusFileA.create() bogusFileset.addFile(bogusFileA) bogusFileset.commit() bogusWorkflow = Workflow(spec = "spec2", owner = "Steve", name = "Bogus", task = "Proc") bogusWorkflow.create() self.bogusSub = Subscription(fileset = bogusFileset, workflow = bogusWorkflow, type = "Processing") self.bogusSub.create() self.bogusSub.acquireFiles(bogusFileA) return def verifyFileKillStatus(self): """ _verifyFileKillStatus_ Verify that all files were killed correctly. The status of files in Cleanup and LogCollect subscriptions isn't modified. Status of already completed and failed files is not modified. Also verify that the bogus subscription is untouched. """ failedFiles = self.mainProcSub.filesOfStatus("Failed") acquiredFiles = self.mainProcSub.filesOfStatus("Acquired") completedFiles = self.mainProcSub.filesOfStatus("Completed") availableFiles = self.mainProcSub.filesOfStatus("Available") bogusAcquiredFiles = self.bogusSub.filesOfStatus("Acquired") self.assertEqual(len(availableFiles), 0, \ "Error: There should be no available files.") self.assertEqual(len(acquiredFiles), 0, \ "Error: There should be no acquired files.") self.assertEqual(len(bogusAcquiredFiles), 1, \ "Error: There should be one acquired file.") self.assertEqual(len(completedFiles), 3, \ "Error: There should be only one completed file.") goldenLFNs = ["lfnA", "lfnB", "lfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra completed file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(failedFiles), 0, \ "Error: There should be no failed files.") self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainMergeSub.filesOfStatus("Failed") acquiredFiles = self.mainMergeSub.filesOfStatus("Acquired") completedFiles = self.mainMergeSub.filesOfStatus("Completed") availableFiles = self.mainMergeSub.filesOfStatus("Available") self.assertEqual(len(acquiredFiles), 0, \ "Error: Merge subscription should have 0 acq files.") self.assertEqual(len(availableFiles), 0, \ "Error: Merge subscription should have 0 avail files.") self.assertEqual(len(failedFiles), 1, \ "Error: Merge subscription should have 1 failed files.") self.assertEqual(list(failedFiles)[0]["lfn"], "ulfnB", "Error: Wrong failed file.") self.assertEqual(len(completedFiles), 2, \ "Error: Merge subscription should have 2 compl files.") goldenLFNs = ["ulfnA", "ulfnC"] for completedFile in completedFiles: self.assertTrue(completedFile["lfn"] in goldenLFNs, \ "Error: Extra complete file.") goldenLFNs.remove(completedFile["lfn"]) self.assertEqual(len(goldenLFNs), 0, \ "Error: Missing LFN") failedFiles = self.mainCleanupSub.filesOfStatus("Failed") acquiredFiles = self.mainCleanupSub.filesOfStatus("Acquired") completedFiles = self.mainCleanupSub.filesOfStatus("Completed") availableFiles = self.mainCleanupSub.filesOfStatus("Available") self.assertEqual(len(failedFiles), 0, \ "Error: Cleanup subscription should have 0 fai files.") self.assertEqual(len(acquiredFiles), 1, \ "Error: There should be only one acquired file.") self.assertEqual(list(acquiredFiles)[0]["lfn"], "ulfnA", \ "Error: Wrong acquired LFN.") self.assertEqual(len(completedFiles), 1, \ "Error: There should be only one completed file.") self.assertEqual(list(completedFiles)[0]["lfn"], "ulfnB", \ "Error: Wrong completed LFN.") self.assertEqual(len(availableFiles), 1, \ "Error: There should be only one available file.") self.assertEqual(list(availableFiles)[0]["lfn"], "ulfnC", \ "Error: Wrong completed LFN.") return def verifyJobKillStatus(self): """ _verifyJobKillStatus_ Verify that jobs are killed correctly. Jobs belonging to Cleanup and LogCollect subscriptions are not killed. The status of jobs that have already finished running is not changed. """ self.procJobA.load() self.procJobB.load() self.procJobC.load() self.assertEqual(self.procJobA["state"], "killed", \ "Error: Proc job A should be killed.") self.assertEqual(self.procJobB["state"], "killed", \ "Error: Proc job B should be killed.") self.assertEqual(self.procJobC["state"], "complete", \ "Error: Proc job C should be complete.") self.mergeJobA.load() self.mergeJobB.load() self.mergeJobC.load() self.assertEqual(self.mergeJobA["state"], "exhausted", \ "Error: Merge job A should be exhausted.") self.assertEqual(self.mergeJobB["state"], "cleanout", \ "Error: Merge job B should be cleanout.") self.assertEqual(self.mergeJobC["state"], "killed", \ "Error: Merge job C should be killed.") self.cleanupJobA.load() self.cleanupJobB.load() self.cleanupJobC.load() self.assertEqual(self.cleanupJobA["state"], "new", \ "Error: Cleanup job A should be new.") self.assertEqual(self.cleanupJobB["state"], "executing", \ "Error: Cleanup job B should be executing.") self.assertEqual(self.cleanupJobC["state"], "complete", \ "Error: Cleanup job C should be complete.") return def createTestWMSpec(self): """ _createTestWMSpec_ Create a WMSpec that has a processing, merge, cleanup and skims tasks that can be used by the subscription creation test. """ testWorkload = WMWorkloadHelper(WMWorkload("TestWorkload")) testWorkload.setDashboardActivity("TestReReco") testWorkload.setSpecUrl("/path/to/workload") testWorkload.setOwnerDetails("sfoulkes", "DMWM", {'dn': 'MyDN'}) procTask = testWorkload.newTask("ProcessingTask") procTask.setTaskType("Processing") procTask.setSplittingAlgorithm("FileBased", files_per_job = 1) procTaskCMSSW = procTask.makeStep("cmsRun1") procTaskCMSSW.setStepType("CMSSW") procTaskCMSSWHelper = procTaskCMSSW.getTypeHelper() procTask.setTaskType("Processing") procTask.setSiteWhitelist(["site1"]) procTask.setSiteBlacklist(["site2"]) procTask.applyTemplates() procTaskCMSSWHelper.addOutputModule("OutputA", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) mergeTask = procTask.addTask("MergeTask") mergeTask.setInputReference(procTaskCMSSW, outputModule = "OutputA") mergeTask.setTaskType("Merge") mergeTask.setSplittingAlgorithm("WMBSMergeBySize", min_merge_size = 1, max_merge_size = 2, max_merge_events = 3) mergeTaskCMSSW = mergeTask.makeStep("cmsRun1") mergeTaskCMSSW.setStepType("CMSSW") mergeTaskCMSSWHelper = mergeTaskCMSSW.getTypeHelper() mergeTask.setTaskType("Merge") mergeTask.applyTemplates() mergeTaskCMSSWHelper.addOutputModule("Merged", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) cleanupTask = procTask.addTask("CleanupTask") cleanupTask.setInputReference(procTaskCMSSW, outputModule = "OutputA") cleanupTask.setTaskType("Merge") cleanupTask.setSplittingAlgorithm("SiblingProcessingBased", files_per_job = 50) cleanupTaskCMSSW = cleanupTask.makeStep("cmsRun1") cleanupTaskCMSSW.setStepType("CMSSW") dummyCleanupTaskCMSSWHelper = cleanupTaskCMSSW.getTypeHelper() cleanupTask.setTaskType("Cleanup") cleanupTask.applyTemplates() skimTask = mergeTask.addTask("SkimTask") skimTask.setTaskType("Skim") skimTask.setInputReference(mergeTaskCMSSW, outputModule = "Merged") skimTask.setSplittingAlgorithm("FileBased", files_per_job = 1, include_parents = True) skimTaskCMSSW = skimTask.makeStep("cmsRun1") skimTaskCMSSW.setStepType("CMSSW") skimTaskCMSSWHelper = skimTaskCMSSW.getTypeHelper() skimTask.setTaskType("Skim") skimTask.applyTemplates() skimTaskCMSSWHelper.addOutputModule("SkimOutputA", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) skimTaskCMSSWHelper.addOutputModule("SkimOutputB", primaryDataset = "bogusPrimary", processedDataset = "bogusProcessed", dataTier = "DataTierA", lfnBase = "bogusUnmerged", mergedLFNBase = "bogusMerged", filterName = None) return testWorkload def setupMCWMSpec(self): """Setup MC workflow""" self.wmspec = self.createMCWMSpec() self.topLevelTask = getFirstTask(self.wmspec) self.inputDataset = self.topLevelTask.inputDataset() self.dataset = self.topLevelTask.getInputDatasetPath() self.dbs = None self.siteDB = SiteDBJSON() # add sites that would normally be added by operator via resource_control locationDAO = self.daoFactory(classname = "Locations.New") self.pnns = [] for site in ['T2_XX_SiteA', 'T2_XX_SiteB']: locationDAO.execute(siteName = site, pnn = self.siteDB.cmsNametoPhEDExNode(site)[0]) self.pnns.append(self.siteDB.cmsNametoPhEDExNode(site)[0]) def createWMSpec(self, name = 'ReRecoWorkload'): factory = ReRecoWorkloadFactory() rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"]) wmspec = factory.factoryWorkloadConstruction(name, rerecoArgs) wmspec.setSpecUrl("/path/to/workload") wmspec.setSubscriptionInformation(custodialSites = [], nonCustodialSites = [], autoApproveSites = [], priority = "Low", custodialSubType = "Move") return wmspec def createMCWMSpec(self, name='MonteCarloWorkload'): mcArgs['CouchDBName'] = rerecoArgs["CouchDBName"] mcArgs["ConfigCacheID"] = createConfig(mcArgs["CouchDBName"]) wmspec = monteCarloWorkload(name, mcArgs) wmspec.setSpecUrl("/path/to/workload") getFirstTask(wmspec).addProduction(totalevents=10000) return wmspec def getDBS(self, wmspec): topLevelTask = getFirstTask(wmspec) inputDataset = topLevelTask.inputDataset() dbs = DBSReader(inputDataset.dbsurl) #dbsDict = {self.inputDataset.dbsurl : self.dbs} return dbs def createWMBSHelperWithTopTask(self, wmspec, block, mask = None, parentFlag = False, detail = False): topLevelTask = getFirstTask(wmspec) wmbs = WMBSHelper(wmspec, topLevelTask.name(), block, mask, cachepath = self.workDir) if block: if parentFlag: block = self.dbs.getFileBlockWithParents(block)[block] else: block = self.dbs.getFileBlock(block)[block] sub, files = wmbs.createSubscriptionAndAddFiles(block = block) if detail: return wmbs, sub, files else: return wmbs def testKillWorkflow(self): """ _testKillWorkflow_ Verify that workflow killing works correctly. """ configFile = EmulatorSetup.setupWMAgentConfig() config = loadConfigurationFile(configFile) baAPI = BossAirAPI(config = config) # Create nine jobs self.setupForKillTest(baAPI = baAPI) self.assertEqual(len(baAPI._listRunJobs()), 9) killWorkflow("Main", config, config) self.verifyFileKillStatus() self.verifyJobKillStatus() self.assertEqual(len(baAPI._listRunJobs()), 8) EmulatorSetup.deleteConfig(configFile) return def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', pnn = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertSite(siteName = 'site2', pnn = 'goodse2.cern.ch', ceName = 'site2', plugin = "TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath = self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testTopLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual(procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputA"][0]["merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputA"][0]["output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual(mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual(mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual(unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual(mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset = unmergedProcOutput, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', pnn = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertSite(siteName = 'site2', pnn = 'goodse2.cern.ch', ceName = 'site2', plugin = "TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath = self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testTopLevelTask, testWMBSHelper.topLevelFileset) testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") # create the subscription for multiple top task (MergeTask and CleanupTask for the same block) for task in testWorkload.getTopLevelTask(): testResubmitWMBSHelper = WMBSHelper(testWorkload, task.name(), "SomeBlock2", cachepath = self.workDir) testResubmitWMBSHelper.createTopLevelFileset() testResubmitWMBSHelper._createSubscriptionsInWMBS(task, testResubmitWMBSHelper.topLevelFileset) mergeWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual(mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual(unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual(mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset = topLevelFileset, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return def testReReco(self): """ReReco workflow""" # create workflow block = self.dataset + "#" + BLOCK1 wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)) self.assertEqual(len(files), 1) def testReRecoBlackRunRestriction(self): """ReReco workflow with Run restrictions""" block = self.dataset + "#" + BLOCK2 self.topLevelTask.setInputRunBlacklist([181183]) # Set run blacklist to only run in the block wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 0) def testReRecoWhiteRunRestriction(self): block = self.dataset + "#" + BLOCK2 self.topLevelTask.setInputRunWhitelist([181183]) # Set run whitelist to only run in the block wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 1) def testLumiMaskRestrictionsOK(self): block = self.dataset + "#" + BLOCK1 self.wmspec.getTopLevelTask()[0].data.input.splitting.runs = ['181367'] self.wmspec.getTopLevelTask()[0].data.input.splitting.lumis = ['57,80'] wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 1) def testLumiMaskRestrictionsKO(self): block = self.dataset + "#" + BLOCK1 self.wmspec.getTopLevelTask()[0].data.input.splitting.runs = ['123454321'] self.wmspec.getTopLevelTask()[0].data.input.splitting.lumis = ['123,123'] wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) files = wmbs.validFiles(self.dbs.getFileBlock(block)[block]['Files']) self.assertEqual(len(files), 0) def testDuplicateFileInsert(self): # using default wmspec block = self.dataset + "#" + BLOCK1 wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)[block]['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) firstFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname = "Files.InFileset") numOfFiles = len(wmbsDao.execute(firstFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) # use the new spec with same inputdataset block = self.dataset + "#" + BLOCK1 wmspec = self.createWMSpec("TestSpec1") dbs = self.getDBS(wmspec) wmbs = self.createWMBSHelperWithTopTask(wmspec, block) # check duplicate insert dbsFiles = dbs.getFileBlock(block)[block]['Files'] numOfFiles = wmbs.addFiles(dbs.getFileBlock(block)[block]) self.assertEqual(numOfFiles, 0) secondFileset = wmbs.topLevelFileset wmbsDao = wmbs.daofactory(classname = "Files.InFileset") numOfFiles = len(wmbsDao.execute(secondFileset.id)) self.assertEqual(numOfFiles, len(dbsFiles)) self.assertNotEqual(firstFileset.id, secondFileset.id) def testDuplicateSubscription(self): """Can't duplicate subscriptions""" # using default wmspec block = self.dataset + "#" + BLOCK1 wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) filesetId = wmbs.topLevelFileset.id subId = wmbs.topLevelSubscription['id'] # check initially inserted files. dbsFiles = self.dbs.getFileBlock(block)[block]['Files'] self.assertEqual(numOfFiles, len(dbsFiles)) # Not clear what's supposed to happen here, 2nd test is completely redundant dummyFirstFileset = wmbs.topLevelFileset self.assertEqual(numOfFiles, len(dbsFiles)) # reinsert subscription - shouldn't create anything new wmbs = self.createWMBSHelperWithTopTask(self.wmspec, block) wmbs.topLevelFileset.loadData() self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files)) self.assertEqual(filesetId, wmbs.topLevelFileset.id) self.assertEqual(subId, wmbs.topLevelSubscription['id']) # now do a montecarlo workflow self.setupMCWMSpec() mask = Mask(FirstRun = 12, FirstLumi = 1234, FirstEvent = 12345, LastEvent = 999995, LastLumi = 12345, LastRun = 12) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) wmbs.topLevelFileset.loadData() numOfFiles = len(wmbs.topLevelFileset.files) filesetId = wmbs.topLevelFileset.id subId = wmbs.topLevelSubscription['id'] # check initially inserted files. # Not clear what's supposed to happen here, 2nd test is completely redundant numDbsFiles = 1 self.assertEqual(numOfFiles, numDbsFiles) dummyFirstFileset = wmbs.topLevelFileset self.assertEqual(numOfFiles, numDbsFiles) # reinsert subscription - shouldn't create anything new wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) wmbs.topLevelFileset.loadData() self.assertEqual(numOfFiles, len(wmbs.topLevelFileset.files)) self.assertEqual(filesetId, wmbs.topLevelFileset.id) self.assertEqual(subId, wmbs.topLevelSubscription['id']) def testParentage(self): """ 1. check whether parent files are created in wmbs. 2. check parent files are associated to child. 3. When 2 specs with the same input data (one with parent processing, one without it) is inserted, if one without parent processing inserted first then the other with parent processing insert, it still needs to create parent files although child files are duplicate """ # Swap out the dataset for one that has parents task = next(self.wmspec.taskIterator()) oldDS = task.inputDataset() # Copy the old dataset, only will use DBS URL from it task.addInputDataset(dbsurl=oldDS.dbsurl, primary='Cosmics', processed='ComissioningHI-PromptReco-v1', tier='RECO') block = '/Cosmics/ComissioningHI-PromptReco-v1/RECO' + '#5b89ba9c-0dbf-11e1-9b6c-003048caaace' # File creation without parents wmbs, _, numFiles = self.createWMBSHelperWithTopTask(self.wmspec, block, parentFlag=False, detail=True) self.assertEqual(8, numFiles) wmbs.topLevelFileset.loadData() for child in wmbs.topLevelFileset.files: self.assertEqual(len(child["parents"]), 0) # no parents per child # File creation with parents wmbs, _, numFiles = self.createWMBSHelperWithTopTask(self.wmspec, block, parentFlag=True, detail=True) self.assertEqual(8, numFiles) wmbs.topLevelFileset.loadData() for child in wmbs.topLevelFileset.files: self.assertEqual(len(child["parents"]), 1) # one parent per child def testMCFakeFileInjection(self): """Inject fake Monte Carlo files into WMBS""" # This test is failing because the name of the couch DB is set to None # in TestMonteCarloWorkloadFactory.getMCArgs() but changing it to # "reqmgr_config_cache_t" from StdBase test arguments does not fix the # situation. testDuplicateSubscription probably has the same issue self.setupMCWMSpec() mask = Mask(FirstRun = 12, FirstLumi = 1234, FirstEvent = 12345, LastEvent = 999995, LastLumi = 12345, LastRun = 12) wmbs = self.createWMBSHelperWithTopTask(self.wmspec, None, mask) subscription = wmbs.topLevelSubscription self.assertEqual(1, subscription.exists()) fileset = subscription['fileset'] self.assertEqual(1, fileset.exists()) fileset.loadData() # need to refresh from database self.assertEqual(len(fileset.files), 1) self.assertEqual(len(fileset.parents), 0) self.assertFalse(fileset.open) firstFile = list(fileset.files)[0] self.assertEqual(firstFile['events'], mask['LastEvent'] - mask['FirstEvent'] + 1) # inclusive range self.assertEqual(firstFile['merged'], False) # merged files get added to dbs self.assertEqual(len(firstFile['parents']), 0) #firstFile.loadData() self.assertEqual(sorted(firstFile['locations']), sorted(self.pnns)) self.assertEqual(len(firstFile.getParentLFNs()), 0) self.assertEqual(len(firstFile.getRuns()), 1) run = firstFile.getRuns()[0] self.assertEqual(run.run, mask['FirstRun']) self.assertEqual(run.lumis[0], mask['FirstLumi']) self.assertEqual(run.lumis[-1], mask['LastLumi']) self.assertEqual(len(run.lumis), mask['LastLumi'] - mask['FirstLumi'] + 1)