def testDeleteTransaction(self): """ _testDeleteTransaction_ Create a workflow and commit it to the database. Begin a transaction and delete the workflow, then rollback the transaction. Use the workflow's exists() method to verify that the workflow doesn't exist in the database before create() is called, it does exist after create() is called, it doesn't exist after delete() is called and it does exist after the transaction is rolled back. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task='Test') self.assertEqual(testWorkflow.exists(), False, "ERROR: Workflow exists before it was created") testWorkflow.create() self.assertTrue(testWorkflow.exists() > 0, "ERROR: Workflow does not exist after it has been created") myThread = threading.currentThread() myThread.transaction.begin() testWorkflow.delete() self.assertEqual(testWorkflow.exists(), False, "ERROR: Workflow exists after it has been deleted") myThread.transaction.rollback() self.assertTrue(testWorkflow.exists() > 0, "ERROR: Workflow does not exist transaction was rolled back") return
def load(self): """ _load_ Load any meta data about the subscription. This include the id, type, split algorithm, fileset id and workflow id. Either the subscription id or the fileset id and workflow id must be specified for this to work. """ existingTransaction = self.beginTransaction() if self["id"] > 0: action = self.daofactory(classname="Subscriptions.LoadFromID") result = action.execute(id=self["id"], conn=self.getDBConn(), transaction=self.existingTransaction()) else: action = self.daofactory(classname="Subscriptions.LoadFromFilesetWorkflow") result = action.execute(fileset=self["fileset"].id, workflow=self["workflow"].id, conn=self.getDBConn(), transaction=self.existingTransaction()) self["type"] = result["type"] self["id"] = result["id"] self["split_algo"] = result["split_algo"] # Only load the fileset and workflow if they haven't been loaded # already. if self["fileset"].id < 0: self["fileset"] = Fileset(id=result["fileset"]) if self["workflow"].id < 0: self["workflow"] = Workflow(id=result["workflow"]) self.commitTransaction(existingTransaction) return
def createJobs(self, nJobs): """ Creates a series of jobGroups for submissions """ testWorkflow = Workflow(spec="dummy", owner="mnorman", name="dummy", task="basicWorkload/Production") testWorkflow.create() # Create Fileset, Subscription, jobGroup testFileset = Fileset(name="dummy") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, type="Processing", split_algo="FileBased") testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() # Create jobs for id in range(nJobs): testJob = Job(name='Job_%i' % (id)) testJob['owner'] = "mnorman" testJob['location'] = 'Xanadu' testJob.create(testJobGroup) testJobGroup.add(testJob) testFileset.commit() testJobGroup.commit() return testJobGroup
def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) myThread = threading.currentThread() daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daofactory(classname="Locations.New") locationAction.execute(siteName='s1', seName="somese.cern.ch") locationAction.execute(siteName='s2', seName="otherse.cern.ch") self.testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") self.testWorkflow.create() self.performanceParams = { 'timePerEvent': 12, 'memoryRequirement': 2300, 'sizePerEvent': 400 } return
def generateFakeMCFile(self, numEvents=100, firstEvent=1, lastEvent=100, firstLumi=1, lastLumi=10, index=1, existingSub=None): """ _generateFakeMCFile_ Generates a fake MC file for testing production EventBased creation of jobs, it creates a single file subscription if no existing subscription is provided. """ # MC comes with MCFakeFile(s) newFile = File("MCFakeFile-some-hash-%s" % str(index).zfill(5), size=1000, events=numEvents, locations=set(["T1_US_FNAL_Disk"])) newFile.addRun(Run(1, *range(firstLumi, lastLumi + 1))) newFile["first_event"] = firstEvent newFile["last_event"] = lastEvent newFile.create() if existingSub is None: singleMCFileset = Fileset(name="MCTestFileset-%i" % index) singleMCFileset.create() singleMCFileset.addFile(newFile) singleMCFileset.commit() testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() singleMCFileSubscription = Subscription(fileset=singleMCFileset, workflow=testWorkflow, split_algo="EventBased", type="Production") singleMCFileSubscription.create() return singleMCFileSubscription else: existingSub['fileset'].addFile(newFile) existingSub['fileset'].commit() return existingSub
def createTestJob(self, subscriptionType = "Merge"): """ _createTestJob_ Create a test job with two files as input. This will also create the appropriate workflow, jobgroup and subscription. """ testWorkflow = Workflow(spec = makeUUID(), owner = "Simon", name = makeUUID(), task="Test") testWorkflow.create() testWMBSFileset = Fileset(name = "TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset = testWMBSFileset, workflow = testWorkflow, type = subscriptionType) testSubscription.create() testJobGroup = JobGroup(subscription = testSubscription) testJobGroup.create() testFileA = File(lfn = "/this/is/a/lfnA", size = 1024, events = 10) testFileA.addRun(Run(1, *[45])) testFileB = File(lfn = "/this/is/a/lfnB", size = 1024, events = 10) testFileB.addRun(Run(1, *[46])) testFileA.create() testFileB.create() testJob = Job(name = makeUUID(), files = [testFileA, testFileB]) testJob["couch_record"] = "somecouchrecord" testJob["location"] = "test.site.ch" testJob.create(group = testJobGroup) testJob.associateFiles() return testJob
def testRepack(self): """ _testRepack_ Create a Repack workflow and verify it installs into WMBS correctly. """ testArguments = RepackWorkloadFactory.getTestArguments() testArguments.update(deepcopy(REQUEST)) factory = RepackWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Repack", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) repackWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Repack") repackWorkflow.load() self.assertEqual( len(repackWorkflow.outputMap), len(testArguments["Outputs"]) + 1, "Error: Wrong number of WF outputs in the Repack WF.") goldenOutputMods = { "write_PrimaryDataset1_RAW": "RAW", "write_PrimaryDataset2_RAW": "RAW" } for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier mergedOutput = repackWorkflow.outputMap[fset][0][ "merged_output_fileset"] unmergedOutput = repackWorkflow.outputMap[fset][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_PrimaryDataset1_RAW": self.assertEqual( mergedOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/Repack/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = repackWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = repackWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Repack/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Repack/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod, tier in viewitems(goldenOutputMods): mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Repack/RepackMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap), 3, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Repack") topLevelFileset.loadData() repackSubscription = Subscription(fileset=topLevelFileset, workflow=repackWorkflow) repackSubscription.loadData() self.assertEqual(repackSubscription["type"], "Repack", "Error: Wrong subscription type.") self.assertEqual( repackSubscription["split_algo"], "Repack", "Error: Wrong split algorithm. %s" % repackSubscription["split_algo"]) unmergedOutputs = { "write_PrimaryDataset1_RAW": "RAW", "write_PrimaryDataset2_RAW": "RAW" } for unmergedOutput, tier in viewitems(unmergedOutputs): fset = unmergedOutput + tier unmergedDataTier = Fileset( name="/TestWorkload/Repack/unmerged-%s" % fset) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Repack/RepackMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual( mergeSubscription["split_algo"], "RepackMerge", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier unmergedFileset = Fileset(name="/TestWorkload/Repack/unmerged-%s" % fset) unmergedFileset.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Repack/RepackCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") repackLogCollect = Fileset( name="/TestWorkload/Repack/unmerged-logArchive") repackLogCollect.loadData() repackLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Repack/LogCollect") repackLogCollectWorkflow.load() logCollectSub = Subscription(fileset=repackLogCollect, workflow=repackLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") for goldenOutputMod, tier in viewitems(goldenOutputMods): repackMergeLogCollect = Fileset( name="/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod) repackMergeLogCollect.loadData() repackMergeLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Repack/RepackMerge%s/Repack%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) repackMergeLogCollectWorkflow.load() logCollectSubscription = Subscription( fileset=repackMergeLogCollect, workflow=repackMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") return
def _commonMonteCarloTest(self): """ Retrieve the workload from WMBS and test all its properties. """ goldenOutputMods = {"OutputA": "RECO", "OutputB": "USER"} prodWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production") prodWorkflow.load() self.assertEqual(len(prodWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergedOutput = prodWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = prodWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Production/unmerged-%s" % (goldenOutputMod + tier), "Error: Unmerged output fileset is wrong.") logArchOutput = prodWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = prodWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Production/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Production/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod, tier in goldenOutputMods.items(): mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/ProductionMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") from pprint import pformat print(pformat(mergeWorkflow.outputMap)) mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Production-SomeBlock") topLevelFileset.loadData() prodSubscription = Subscription(fileset=topLevelFileset, workflow=prodWorkflow) prodSubscription.loadData() self.assertEqual(prodSubscription["type"], "Production", "Error: Wrong subscription type.") self.assertEqual(prodSubscription["split_algo"], "EventBased", "Error: Wrong split algo.") for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier unmergedOutput = Fileset(name="/TestWorkload/Production/unmerged-%s" % fset) unmergedOutput.loadData() mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/ProductionMerge%s" % goldenOutputMod) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedOutput, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier unmerged = Fileset(name="/TestWorkload/Production/unmerged-%s" % fset) unmerged.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/ProductionCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name="/TestWorkload/Production/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") for goldenOutputMod in goldenOutputMods: mergeLogCollect = Fileset( name="/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod) mergeLogCollect.loadData() mergeLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/ProductionMerge%s/Production%sMergeLogCollect" % ( goldenOutputMod, goldenOutputMod)) mergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=mergeLogCollect, workflow=mergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.")
def injectJobs(self): """ _injectJobs_ Inject two workflows into WMBS and save the job objects to disk. """ testWorkflowA = Workflow(spec="specA.pkl", owner="Steve", name="wf001", task="TestTaskA") testWorkflowA.create() testWorkflowB = Workflow(spec="specB.pkl", owner="Steve", name="wf002", task="TestTaskB") testWorkflowB.create() testFileset = Fileset("testFileset") testFileset.create() testSubA = Subscription(fileset=testFileset, workflow=testWorkflowA) testSubA.create() testSubB = Subscription(fileset=testFileset, workflow=testWorkflowB) testSubB.create() testGroupA = JobGroup(subscription=testSubA) testGroupA.create() testGroupB = JobGroup(subscription=testSubB) testGroupB.create() stateChanger = ChangeState(self.createConfig(), "jobsubmittercaching_t") for i in range(10): newFile = File(lfn="testFile%s" % i, locations=set(["se.T1_US_FNAL", "se.T1_UK_RAL"])) newFile.create() newJobA = Job(name="testJobA-%s" % i, files=[newFile]) newJobA["workflow"] = "wf001" newJobA["possiblePSN"] = ["T1_US_FNAL"] newJobA["sandbox"] = "%s/somesandbox" % self.testDir newJobA["owner"] = "Steve" jobCacheDir = os.path.join(self.testDir, "jobA-%s" % i) os.mkdir(jobCacheDir) newJobA["cache_dir"] = jobCacheDir newJobA["type"] = "Processing" newJobA['requestType'] = 'ReReco' newJobA.create(testGroupA) jobHandle = open(os.path.join(jobCacheDir, "job.pkl"), "wb") pickle.dump(newJobA, jobHandle) jobHandle.close() stateChanger.propagate([newJobA], "created", "new") newJobB = Job(name="testJobB-%s" % i, files=[newFile]) newJobB["workflow"] = "wf001" newJobB["possiblePSN"] = ["T1_UK_RAL"] newJobB["sandbox"] = "%s/somesandbox" % self.testDir newJobB["owner"] = "Steve" jobCacheDir = os.path.join(self.testDir, "jobB-%s" % i) os.mkdir(jobCacheDir) newJobB["cache_dir"] = jobCacheDir newJobB["type"] = "Processing" newJobB['requestType'] = 'ReReco' newJobB.create(testGroupB) jobHandle = open(os.path.join(jobCacheDir, "job.pkl"), "wb") pickle.dump(newJobB, jobHandle) jobHandle.close() stateChanger.propagate([newJobB], "created", "new") return
def setUp(self): """ _setUp_ """ import WMQuality.TestInit WMQuality.TestInit.deleteDatabaseAfterEveryTest("I'm Serious") self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema( customModules=["WMComponent.DBS3Buffer", "T0.WMBS"]) self.splitterFactory = SplitterFactory(package="T0.JobSplitting") myThread = threading.currentThread() daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) myThread.dbi.processData("""INSERT INTO wmbs_location (id, site_name, state, state_time) VALUES (1, 'SomeSite', 1, 1) """, transaction=False) myThread.dbi.processData("""INSERT INTO wmbs_pnns (id, pnn) VALUES (2, 'SomePNN') """, transaction=False) myThread.dbi.processData("""INSERT INTO wmbs_location_pnns (location, pnn) VALUES (1, 2) """, transaction=False) insertRunDAO = daoFactory(classname="RunConfig.InsertRun") insertRunDAO.execute(binds={ 'RUN': 1, 'HLTKEY': "someHLTKey" }, transaction=False) insertLumiDAO = daoFactory(classname="RunConfig.InsertLumiSection") for lumi in range(1, 5): insertLumiDAO.execute(binds={ 'RUN': 1, 'LUMI': lumi }, transaction=False) insertStreamDAO = daoFactory(classname="RunConfig.InsertStream") insertStreamDAO.execute(binds={'STREAM': "Express"}, transaction=False) insertStreamFilesetDAO = daoFactory( classname="RunConfig.InsertStreamFileset") insertStreamFilesetDAO.execute(1, "Express", "TestFileset1") fileset1 = Fileset(name="TestFileset1") self.fileset2 = Fileset(name="TestFileset2") fileset1.load() self.fileset2.create() workflow1 = Workflow(spec="spec.xml", owner="hufnagel", name="TestWorkflow1", task="Test") workflow2 = Workflow(spec="spec.xml", owner="hufnagel", name="TestWorkflow2", task="Test") workflow1.create() workflow2.create() self.subscription1 = Subscription(fileset=fileset1, workflow=workflow1, split_algo="Express", type="Express") self.subscription2 = Subscription(fileset=self.fileset2, workflow=workflow2, split_algo="ExpressMerge", type="ExpressMerge") self.subscription1.create() self.subscription2.create() myThread.dbi.processData("""INSERT INTO wmbs_workflow_output (WORKFLOW_ID, OUTPUT_IDENTIFIER, OUTPUT_FILESET) VALUES (%d, 'SOMEOUTPUT', %d) """ % (workflow1.id, self.fileset2.id), transaction=False) # keep for later self.insertSplitLumisDAO = daoFactory( classname="JobSplitting.InsertSplitLumis") # default split parameters self.splitArgs = {} self.splitArgs['maxInputSize'] = 2 * 1024 * 1024 * 1024 self.splitArgs['maxInputFiles'] = 500, self.splitArgs['maxLatency'] = 15 * 23 return
def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', seName='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testTopLevelTask, testWMBSHelper.topLevelFileset) testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") # create the subscription for multiple top task (MergeTask and CleanupTask for the same block) for task in testWorkload.getTopLevelTask(): testResubmitWMBSHelper = WMBSHelper(testWorkload, task.name(), "SomeBlock2", cachepath=self.workDir) testResubmitWMBSHelper.createTopLevelFileset() testResubmitWMBSHelper._createSubscriptionsInWMBS( task, testResubmitWMBSHelper.topLevelFileset) mergeWorkflow = Workflow(name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset( name="ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset=topLevelFileset, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return
def pollSubscriptions(self): """ Poller for looking in all active subscriptions for jobs that need to be made. """ logging.info("Beginning JobCreator.pollSubscriptions() cycle.") myThread = threading.currentThread() # First, get list of Subscriptions subscriptions = self.subscriptionList.execute() # Okay, now we have a list of subscriptions for subscriptionID in subscriptions: wmbsSubscription = Subscription(id=subscriptionID) try: wmbsSubscription.load() except IndexError: # This happens when the subscription no longer exists # i.e., someone executed a kill() function on the database # while the JobCreator was in cycle # Ignore this subscription msg = "JobCreator cannot load subscription %i" % subscriptionID logging.error(msg) continue workflow = Workflow(id=wmbsSubscription["workflow"].id) workflow.load() wmbsSubscription['workflow'] = workflow wmWorkload = retrieveWMSpec(workflow=workflow) if not workflow.task or not wmWorkload: # Then we have a problem # We NEED a sandbox # Abort this subscription! # But do NOT fail # We have no way of marking a subscription as bad per se # We'll have to just keep skipping it msg = "Have no task for workflow %i\n" % (workflow.id) msg += "Aborting Subscription %i" % (subscriptionID) logging.error(msg) continue logging.debug("Have loaded subscription %i with workflow %i\n", subscriptionID, workflow.id) # retrieve information from the workload to propagate down to the job configuration allowOpport = wmWorkload.getAllowOpportunistic() # Set task object wmTask = wmWorkload.getTaskByPath(workflow.task) # Get generators # If you fail to load the generators, pass on the job try: if hasattr(wmTask.data, 'generators'): manager = GeneratorManager(wmTask) seederList = manager.getGeneratorList() else: seederList = [] except Exception as ex: msg = "Had failure loading generators for subscription %i\n" % (subscriptionID) msg += "Exception: %s\n" % str(ex) msg += "Passing over this error. It will reoccur next interation!\n" msg += "Please check or remove this subscription!\n" logging.error(msg) continue logging.debug("Going to call wmbsJobFactory for sub %i with limit %i", subscriptionID, self.limit) splitParams = retrieveJobSplitParams(wmWorkload, workflow.task) logging.debug("Split Params: %s", splitParams) # Load the proper job splitting module splitterFactory = SplitterFactory(splitParams.get('algo_package', "WMCore.JobSplitting")) # and return an instance of the splitting algorithm wmbsJobFactory = splitterFactory(package="WMCore.WMBS", subscription=wmbsSubscription, generators=seederList, limit=self.limit) # Turn on the jobFactory --> get available files for that subscription, keep result proxies wmbsJobFactory.open() # Create a function to hold it, calling __call__ from the JobFactory # which then calls algorithm method of the job splitting algo instance jobSplittingFunction = runSplitter(jobFactory=wmbsJobFactory, splitParams=splitParams) # Now we get to find out how many jobs there are. jobNumber = self.countJobs.execute(workflow=workflow.id, conn=myThread.transaction.conn, transaction=True) jobNumber += splitParams.get('initial_lfn_counter', 0) logging.debug("Have %i jobs for workflow %s already in database.", jobNumber, workflow.name) continueSubscription = True while continueSubscription: # This loop runs over the jobFactory, # using yield statements and a pre-existing proxy to # generate and process new jobs # First we need the jobs. myThread.transaction.begin() try: wmbsJobGroups = next(jobSplittingFunction) logging.info("Retrieved %i jobGroups from jobSplitter", len(wmbsJobGroups)) except StopIteration: # If you receive a stopIteration, we're done logging.info("Completed iteration over subscription %i", subscriptionID) continueSubscription = False myThread.transaction.commit() break # If we have no jobGroups, we're done if len(wmbsJobGroups) == 0: logging.info("Found end in iteration over subscription %i", subscriptionID) continueSubscription = False myThread.transaction.commit() break # Assemble a dict of all the info processDict = {'workflow': workflow, 'wmWorkload': wmWorkload, 'wmTaskName': wmTask.getPathName(), 'jobNumber': jobNumber, 'sandbox': wmTask.data.input.sandbox, 'owner': wmWorkload.getOwner().get('name', None), 'ownerDN': wmWorkload.getOwner().get('dn', None), 'ownerGroup': wmWorkload.getOwner().get('vogroup', ''), 'ownerRole': wmWorkload.getOwner().get('vorole', ''), 'numberOfCores': 1, 'inputDataset': wmTask.getInputDatasetPath(), 'inputPileup': wmTask.getInputPileupDatasets()} try: maxCores = 1 stepNames = wmTask.listAllStepNames() for stepName in stepNames: sh = wmTask.getStep(stepName) maxCores = max(maxCores, sh.getNumberOfCores()) processDict.update({'numberOfCores': maxCores}) except AttributeError: logging.info("Failed to read multicore settings from task %s", wmTask.getPathName()) tempSubscription = Subscription(id=wmbsSubscription['id']) # if we have glideinWMS constraints, then adapt all jobs if self.glideinLimits: capResourceEstimates(wmbsJobGroups, self.glideinLimits) nameDictList = [] for wmbsJobGroup in wmbsJobGroups: # For each jobGroup, put a dictionary # together and run it with creatorProcess jobsInGroup = len(wmbsJobGroup.jobs) wmbsJobGroup.subscription = tempSubscription tempDict = {} tempDict.update(processDict) tempDict['jobGroup'] = wmbsJobGroup tempDict['swVersion'] = wmTask.getSwVersion(allSteps=True) tempDict['scramArch'] = wmTask.getScramArch() tempDict['jobNumber'] = jobNumber tempDict['agentNumber'] = self.agentNumber tempDict['agentName'] = self.agentName tempDict['inputDatasetLocations'] = wmbsJobGroup.getLocationsForJobs() tempDict['allowOpportunistic'] = allowOpport jobGroup = creatorProcess(work=tempDict, jobCacheDir=self.jobCacheDir) jobNumber += jobsInGroup # Set jobCache for group for job in jobGroup.jobs: nameDictList.append({'jobid': job['id'], 'cacheDir': job['cache_dir']}) job["user"] = wmWorkload.getOwner()["name"] job["group"] = wmWorkload.getOwner()["group"] # Set the caches in the database try: if len(nameDictList) > 0: self.setBulkCache.execute(jobDictList=nameDictList, conn=myThread.transaction.conn, transaction=True) except WMException: raise except Exception as ex: msg = "Unknown exception while setting the bulk cache:\n" msg += str(ex) logging.error(msg) logging.debug("Error while setting bulkCache with following values: %s\n", nameDictList) raise JobCreatorException(msg) # Advance the jobGroup in changeState for wmbsJobGroup in wmbsJobGroups: self.advanceJobGroup(wmbsJobGroup=wmbsJobGroup) # Now end the transaction so that everything is wrapped # in a single rollback myThread.transaction.commit() # END: While loop over jobFactory # Close the jobFactory wmbsJobFactory.close() return
def _checkTask(self, task, taskConf, centralConf): """ _checkTask_ Verify the correctness of the task """ if taskConf.get("InputTask") is not None: inpTaskPath = task.getPathName() inpTaskPath = inpTaskPath.replace(task.name(), "") inpTaskPath += "cmsRun1" self.assertEqual(task.data.input.inputStep, inpTaskPath, "Input step is wrong in the spec") self.assertTrue( taskConf["InputTask"] in inpTaskPath, "Input task is not in the path name for child task") if "MCPileup" in taskConf or "DataPileup" in taskConf: mcDataset = taskConf.get('MCPileup', None) dataDataset = taskConf.get('DataPileup', None) if mcDataset: self.assertEqual(task.data.steps.cmsRun1.pileup.mc.dataset, [mcDataset]) if dataDataset: self.assertEqual(task.data.steps.cmsRun1.pileup.data.dataset, [dataDataset]) workflow = Workflow(name=self.workload.name(), task=task.getPathName()) workflow.load() outputMods = outputModuleList(task) ignoredOutputMods = task.getIgnoredOutputModulesForTask() outputMods = set(outputMods) - set(ignoredOutputMods) self.assertEqual(len(workflow.outputMap.keys()), len(outputMods), "Error: Wrong number of WF outputs") for outputModule in outputMods: filesets = workflow.outputMap[outputModule][0] merged = filesets['merged_output_fileset'] unmerged = filesets['output_fileset'] merged.loadData() unmerged.loadData() mergedset = task.getPathName() + "/" + task.name( ) + "Merge" + outputModule + "/merged-Merged" if outputModule == "logArchive" or not taskConf.get("KeepOutput", True) \ or outputModule in taskConf.get("TransientOutputModules", []) or outputModule in centralConf.get("IgnoredOutputModules", []): mergedset = task.getPathName() + "/unmerged-" + outputModule unmergedset = task.getPathName() + "/unmerged-" + outputModule self.assertEqual(mergedset, merged.name, "Merged fileset name is wrong") self.assertEqual(unmergedset, unmerged.name, "Unmerged fileset name is wrong") if outputModule != "logArchive" and taskConf.get("KeepOutput", True) \ and outputModule not in taskConf.get("TransientOutputModules", []) \ and outputModule not in centralConf.get("IgnoredOutputModules", []): mergeTask = task.getPathName() + "/" + task.name( ) + "Merge" + outputModule mergeWorkflow = Workflow(name=self.workload.name(), task=mergeTask) mergeWorkflow.load() self.assertTrue( "Merged" in mergeWorkflow.outputMap, "Merge workflow does not contain a Merged output key") mergedOutputMod = mergeWorkflow.outputMap['Merged'][0] mergedFileset = mergedOutputMod['merged_output_fileset'] unmergedFileset = mergedOutputMod['output_fileset'] mergedFileset.loadData() unmergedFileset.loadData() self.assertEqual(mergedFileset.name, mergedset, "Merged fileset name in merge task is wrong") self.assertEqual( unmergedFileset.name, mergedset, "Unmerged fileset name in merge task is wrong") mrgLogArch = mergeWorkflow.outputMap['logArchive'][0][ 'merged_output_fileset'] umrgLogArch = mergeWorkflow.outputMap['logArchive'][0][ 'output_fileset'] mrgLogArch.loadData() umrgLogArch.loadData() archName = task.getPathName() + "/" + task.name( ) + "Merge" + outputModule + "/merged-logArchive" self.assertEqual( mrgLogArch.name, archName, "LogArchive merged fileset name is wrong in merge task") self.assertEqual( umrgLogArch.name, archName, "LogArchive unmerged fileset name is wrong in merge task") if outputModule != "logArchive": taskOutputMods = task.getOutputModulesForStep( stepName="cmsRun1") currentModule = getattr(taskOutputMods, outputModule) if taskConf.get("PrimaryDataset") is not None: self.assertEqual(currentModule.primaryDataset, taskConf["PrimaryDataset"], "Wrong primary dataset") processedDatasetParts = [ "AcquisitionEra, ProcessingString, ProcessingVersion" ] allParts = True for part in processedDatasetParts: if part in taskConf: self.assertTrue(part in currentModule.processedDataset, "Wrong processed dataset for module") else: allParts = False if allParts: self.assertEqual( "%s-%s-v%s" % (taskConf["AcquisitionEra"], taskConf["ProcessingString"], taskConf["ProcessingVersion"]), "Wrong processed dataset for module") # Test subscriptions if taskConf.get("InputTask") is None: inputFileset = "%s-%s-SomeBlock" % (self.workload.name(), task.name()) elif "Merge" in task.getPathName().split("/")[-2]: inpTaskPath = task.getPathName().replace(task.name(), "") inputFileset = inpTaskPath + "merged-Merged" else: inpTaskPath = task.getPathName().replace(task.name(), "") inputFileset = inpTaskPath + "unmerged-%s" % taskConf[ "InputFromOutputModule"] taskFileset = Fileset(name=inputFileset) taskFileset.loadData() taskSubscription = Subscription(fileset=taskFileset, workflow=workflow) taskSubscription.loadData() if taskConf.get("InputTask") is None and taskConf.get( "InputDataset") is None: # Production type self.assertEqual( taskSubscription["type"], "Production", "Error: Wrong subscription type for processing task") self.assertEqual(taskSubscription["split_algo"], taskConf["SplittingAlgo"], "Error: Wrong split algo for generation task") else: # Processing type self.assertEqual(taskSubscription["type"], "Processing", "Wrong subscription type for task") if taskSubscription["split_algo"] != "WMBSMergeBySize": self.assertEqual(taskSubscription["split_algo"], taskConf['SplittingAlgo'], "Splitting algo mismatch") else: self.assertEqual( taskFileset.name, inpTaskPath + "unmerged-%s" % taskConf["InputFromOutputModule"], "Subscription uses WMBSMergeBySize on a merge fileset") return
def testPrivateMC(self): """ _testAnalysis_ """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "privatemc_t" defaultArguments["AnalysisConfigCacheDoc"] = self.injectAnalysisConfig( ) defaultArguments["ProcessingVersion"] = 1 processingFactory = PrivateMCWorkloadFactory() testWorkload = processingFactory("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "PrivateMC", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/PrivateMC") procWorkflow.load() self.assertEqual( len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs: %s" % len(procWorkflow.outputMap.keys())) logArchOutput = procWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] #Actually Analysis does not have a merge task unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/PrivateMC/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/PrivateMC/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["OutputA", "OutputB"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/PrivateMC/unmerged-%s" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/PrivateMC/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-PrivateMC-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "PrivateMC", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "EventBased", "Error: Wrong split algo.") procLogCollect = Fileset( name="/TestWorkload/PrivateMC/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/PrivateMC/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.")
def testMonteCarloFromGEN(self): """ _testMonteCarloFromGEN_ Create a MonteCarloFromGEN workflow and verify it installs into WMBS correctly. """ arguments = MonteCarloFromGENWorkloadFactory.getTestArguments() arguments["ConfigCacheID"] = self.injectConfig() arguments["CouchDBName"] = "mclhe_t" arguments["PrimaryDataset"] = "WaitThisIsNotMinimumBias" factory = MonteCarloFromGENWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", arguments) outputDatasets = testWorkload.listOutputDatasets() self.assertEqual(len(outputDatasets), 2) self.assertTrue("/WaitThisIsNotMinimumBias/FAKE-FilterRECO-FAKE-v1/RECO" in outputDatasets) self.assertTrue("/WaitThisIsNotMinimumBias/FAKE-FilterALCARECO-FAKE-v1/ALCARECO" in outputDatasets) productionTask = testWorkload.getTaskByPath('/TestWorkload/MonteCarloFromGEN') splitting = productionTask.jobSplittingParameters() self.assertFalse(splitting["deterministicPileup"]) testWMBSHelper = WMBSHelper(testWorkload, "MonteCarloFromGEN", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") self.assertEqual(procWorkflow.wfType, 'production') goldenOutputMods = ["outputRECORECO", "outputALCARECOALCARECO"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-MonteCarloFromGEN-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Production", "Error: Wrong subscription type: %s" % procSubscription["type"]) self.assertEqual(procSubscription["split_algo"], "EventAwareLumiBased", "Error: Wrong split algo.") unmergedReco = Fileset(name="/TestWorkload/MonteCarloFromGEN/unmerged-outputRECORECO") unmergedReco.loadData() recoMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO") recoMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedReco, workflow=recoMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) unmergedAlca = Fileset(name="/TestWorkload/MonteCarloFromGEN/unmerged-outputALCARECOALCARECO") unmergedAlca.loadData() alcaMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO") alcaMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedAlca, workflow=alcaMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) for procOutput in ["outputRECORECO", "outputALCARECOALCARECO"]: unmerged = Fileset(name="/TestWorkload/MonteCarloFromGEN/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name="/TestWorkload/MonteCarloFromGEN/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset( name="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/MonteCarloFromGENoutputRECORECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset( name="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/MonteCarloFromGENoutputALCARECOALCARECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def stuffWMBS(self, workflowURL, name): """ _stuffWMBS_ Insert some dummy jobs, jobgroups, filesets, files and subscriptions into WMBS to test job creation. Three completed job groups each containing several files are injected. Another incomplete job group is also injected. Also files are added to the "Mergeable" subscription as well as to the output fileset for their jobgroups. """ locationAction = self.daoFactory(classname="Locations.New") locationAction.execute(siteName="s1", seName="somese.cern.ch") changeStateDAO = self.daoFactory(classname="Jobs.ChangeState") mergeFileset = Fileset(name="mergeFileset") mergeFileset.create() bogusFileset = Fileset(name="bogusFileset") bogusFileset.create() mergeWorkflow = Workflow(spec=workflowURL, owner="mnorman", name=name, task="/TestWorkload/ReReco") mergeWorkflow.create() mergeSubscription = Subscription(fileset=mergeFileset, workflow=mergeWorkflow, split_algo="ParentlessMergeBySize") mergeSubscription.create() bogusSubscription = Subscription(fileset=bogusFileset, workflow=mergeWorkflow, split_algo="ParentlessMergeBySize") file1 = File(lfn="file1", size=1024, events=1024, first_event=0, locations=set(["somese.cern.ch"])) file1.addRun(Run(1, *[45])) file1.create() file2 = File(lfn="file2", size=1024, events=1024, first_event=1024, locations=set(["somese.cern.ch"])) file2.addRun(Run(1, *[45])) file2.create() file3 = File(lfn="file3", size=1024, events=1024, first_event=2048, locations=set(["somese.cern.ch"])) file3.addRun(Run(1, *[45])) file3.create() file4 = File(lfn="file4", size=1024, events=1024, first_event=3072, locations=set(["somese.cern.ch"])) file4.addRun(Run(1, *[45])) file4.create() fileA = File(lfn="fileA", size=1024, events=1024, first_event=0, locations=set(["somese.cern.ch"])) fileA.addRun(Run(1, *[46])) fileA.create() fileB = File(lfn="fileB", size=1024, events=1024, first_event=1024, locations=set(["somese.cern.ch"])) fileB.addRun(Run(1, *[46])) fileB.create() fileC = File(lfn="fileC", size=1024, events=1024, first_event=2048, locations=set(["somese.cern.ch"])) fileC.addRun(Run(1, *[46])) fileC.create() fileI = File(lfn="fileI", size=1024, events=1024, first_event=0, locations=set(["somese.cern.ch"])) fileI.addRun(Run(2, *[46])) fileI.create() fileII = File(lfn="fileII", size=1024, events=1024, first_event=1024, locations=set(["somese.cern.ch"])) fileII.addRun(Run(2, *[46])) fileII.create() fileIII = File(lfn="fileIII", size=1024, events=102400, first_event=2048, locations=set(["somese.cern.ch"])) fileIII.addRun(Run(2, *[46])) fileIII.create() fileIV = File(lfn="fileIV", size=102400, events=1024, first_event=3072, locations=set(["somese.cern.ch"])) fileIV.addRun(Run(2, *[46])) fileIV.create() for file in [ file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII, fileIII, fileIV ]: mergeFileset.addFile(file) bogusFileset.addFile(file) mergeFileset.commit() bogusFileset.commit() return
def setUp(self): """ _setUp_ """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["T0.WMBS"]) self.splitterFactory = SplitterFactory(package = "T0.JobSplitting") myThread = threading.currentThread() daoFactory = DAOFactory(package = "T0.WMBS", logger = logging, dbinterface = myThread.dbi) wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) myThread.dbi.processData("""INSERT INTO wmbs_location (id, site_name, state) VALUES (1, 'SomeSite', 1) """, transaction = False) myThread.dbi.processData("""INSERT INTO wmbs_location_senames (location, se_name) VALUES (1, 'SomeSE') """, transaction = False) insertRunDAO = daoFactory(classname = "RunConfig.InsertRun") insertRunDAO.execute(binds = { 'RUN' : 1, 'TIME' : int(time.time()), 'HLTKEY' : "someHLTKey" }, transaction = False) insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection") insertLumiDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 1 }, transaction = False) insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream") insertStreamDAO.execute(binds = { 'STREAM' : "Express" }, transaction = False) insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset") insertStreamFilesetDAO.execute(1, "Express", "TestFileset1") insertStreamerDAO = daoFactory(classname = "RunConfig.InsertStreamer") insertStreamerDAO.execute(binds = { 'RUN' : 1, 'LUMI' : 1, 'STREAM' : "Express", 'TIME' : int(time.time()), 'LFN' : "/streamer", 'FILESIZE' : 0, 'EVENTS' : 0 }, transaction = False) insertPromptCalibrationDAO = daoFactory(classname = "RunConfig.InsertPromptCalibration") insertPromptCalibrationDAO.execute( { 'RUN' : 1, 'STREAM' : "Express" }, transaction = False) self.fileset1 = Fileset(name = "TestFileset1") self.fileset1.create() workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test") workflow1.create() self.subscription1 = Subscription(fileset = self.fileset1, workflow = workflow1, split_algo = "Condition", type = "Condition") self.subscription1.create() # set parentage chain and sqlite fileset alcaRecoFile = File("/alcareco", size = 0, events = 0) alcaRecoFile.addRun(Run(1, *[1])) alcaRecoFile.setLocation("SomeSE", immediateSave = False) alcaRecoFile.create() alcaPromptFile = File("/alcaprompt", size = 0, events = 0) alcaPromptFile.addRun(Run(1, *[1])) alcaPromptFile.setLocation("SomeSE", immediateSave = False) alcaPromptFile.create() sqliteFile = File("/sqlite", size = 0, events = 0) sqliteFile.create() self.fileset1.addFile(sqliteFile) self.fileset1.commit() results = myThread.dbi.processData("""SELECT lfn FROM wmbs_file_details """, transaction = False)[0].fetchall() setParentageDAO = wmbsDaoFactory(classname = "Files.SetParentage") setParentageDAO.execute(binds = [ { 'parent' : "/streamer", 'child' : "/alcareco" }, { 'parent' : "/alcareco", 'child' : "/alcaprompt" }, { 'parent' : "/alcaprompt", 'child' : "/sqlite" } ], transaction = False) # default split parameters self.splitArgs = {} self.splitArgs['runNumber'] = 1 self.splitArgs['streamName'] = "Express" return
def setUp(self): """ _setUp_ Setup the database and WMBS for the test. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules = ["WMComponent.DBS3Buffer", "WMCore.WMBS"], useDefault = False) myThread = threading.currentThread() self.daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) self.dbsfactory = DAOFactory(package = "WMComponent.DBS3Buffer", logger = myThread.logger, dbinterface = myThread.dbi) locationAction = self.daofactory(classname = "Locations.New") locationAction.execute(siteName = "site1", pnn = "T1_US_FNAL_Disk") inputFile = File(lfn = "/path/to/some/lfn", size = 10, events = 10, locations = "T1_US_FNAL_Disk") inputFile.create() inputFileset = Fileset(name = "InputFileset") inputFileset.create() inputFileset.addFile(inputFile) inputFileset.commit() unmergedFileset = Fileset(name = "UnmergedFileset") unmergedFileset.create() mergedFileset = Fileset(name = "MergedFileset") mergedFileset.create() procWorkflow = Workflow(spec = "wf001.xml", owner = "Steve", name = "TestWF", task = "/TestWF/None") procWorkflow.create() procWorkflow.addOutput("outputRECORECO", unmergedFileset) mergeWorkflow = Workflow(spec = "wf002.xml", owner = "Steve", name = "MergeWF", task = "/MergeWF/None") mergeWorkflow.create() mergeWorkflow.addOutput("Merged", mergedFileset) insertWorkflow = self.dbsfactory(classname = "InsertWorkflow") insertWorkflow.execute("TestWF", "/TestWF/None", 0, 0, 0, 0) insertWorkflow.execute("MergeWF", "/MergeWF/None", 0, 0, 0, 0) self.procSubscription = Subscription(fileset = inputFileset, workflow = procWorkflow, split_algo = "FileBased", type = "Processing") self.procSubscription.create() self.procSubscription.acquireFiles() self.mergeSubscription = Subscription(fileset = unmergedFileset, workflow = mergeWorkflow, split_algo = "WMBSMergeBySize", type = "Merge") self.mergeSubscription.create() self.procJobGroup = JobGroup(subscription = self.procSubscription) self.procJobGroup.create() self.mergeJobGroup = JobGroup(subscription = self.mergeSubscription) self.mergeJobGroup.create() self.testJob = Job(name = "testJob", files = [inputFile]) self.testJob.create(group = self.procJobGroup) self.testJob["state"] = "complete" myThread = threading.currentThread() self.daofactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) self.stateChangeAction = self.daofactory(classname = "Jobs.ChangeState") self.setFWJRAction = self.daofactory(classname = "Jobs.SetFWJRPath") self.getJobTypeAction = self.daofactory(classname = "Jobs.GetType") locationAction = self.daofactory(classname = "Locations.New") locationAction.execute(siteName = "cmssrm.fnal.gov") self.stateChangeAction.execute(jobs = [self.testJob]) self.tempDir = tempfile.mkdtemp() return
def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', seName='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testTopLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual( procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual( mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset=unmergedProcOutput, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return
def testMask(self): """ _testMask_ Test the new mask setup """ testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job() testJob['mask'].addRunAndLumis(run=100, lumis=[101, 102]) testJob['mask'].addRunAndLumis(run=200, lumis=[201, 202]) testJob.create(group=testJobGroup) loadJob = Job(id=testJob.exists()) loadJob.loadData() runs = loadJob['mask'].getRunAndLumis() self.assertEqual(len(runs), 2) self.assertEqual(runs[100], [[101, 102]]) self.assertEqual(runs[200], [[201, 202]]) bigRun = Run(100, *[101, 102, 103, 104]) badRun = Run(300, *[1001, 1002]) result = loadJob['mask'].filterRunLumisByMask([bigRun, badRun]) self.assertEqual(len(result), 1) alteredRun = result.pop() self.assertEqual(alteredRun.run, 100) self.assertEqual(alteredRun.lumis, [101, 102]) run0 = Run(300, *[1001, 1002]) run1 = Run(300, *[1001, 1002]) loadJob['mask'].filterRunLumisByMask([run0, run1]) return
def setupForKillTest(self, baAPI=None): """ _setupForKillTest_ Inject a workflow into WMBS that has a processing task, a merge task and a cleanup task. Inject files into the various tasks at various processing states (acquired, complete, available...). Also create jobs for each subscription in various states. """ myThread = threading.currentThread() daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daoFactory(classname="Locations.New") changeStateAction = daoFactory(classname="Jobs.ChangeState") resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertThreshold(siteName = 'site1', taskType = 'Processing', \ maxSlots = 10000, pendingSlots = 10000) userDN = 'someDN' userAction = daoFactory(classname="Users.New") userAction.execute(dn=userDN, group_name='DEFAULT', role_name='DEFAULT') inputFileset = Fileset("input") inputFileset.create() inputFileA = File("lfnA", locations="goodse.cern.ch") inputFileB = File("lfnB", locations="goodse.cern.ch") inputFileC = File("lfnC", locations="goodse.cern.ch") inputFileA.create() inputFileB.create() inputFileC.create() inputFileset.addFile(inputFileA) inputFileset.addFile(inputFileB) inputFileset.addFile(inputFileC) inputFileset.commit() unmergedOutputFileset = Fileset("unmerged") unmergedOutputFileset.create() unmergedFileA = File("ulfnA", locations="goodse.cern.ch") unmergedFileB = File("ulfnB", locations="goodse.cern.ch") unmergedFileC = File("ulfnC", locations="goodse.cern.ch") unmergedFileA.create() unmergedFileB.create() unmergedFileC.create() unmergedOutputFileset.addFile(unmergedFileA) unmergedOutputFileset.addFile(unmergedFileB) unmergedOutputFileset.addFile(unmergedFileC) unmergedOutputFileset.commit() mainProcWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="Proc") mainProcWorkflow.create() mainProcMergeWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="ProcMerge") mainProcMergeWorkflow.create() mainCleanupWorkflow = Workflow(spec="spec1", owner="Steve", name="Main", task="Cleanup") mainCleanupWorkflow.create() self.mainProcSub = Subscription(fileset=inputFileset, workflow=mainProcWorkflow, type="Processing") self.mainProcSub.create() self.mainProcSub.acquireFiles(inputFileA) self.mainProcSub.completeFiles(inputFileB) procJobGroup = JobGroup(subscription=self.mainProcSub) procJobGroup.create() self.procJobA = Job(name="ProcJobA") self.procJobA["state"] = "new" self.procJobA["location"] = "site1" self.procJobB = Job(name="ProcJobB") self.procJobB["state"] = "executing" self.procJobB["location"] = "site1" self.procJobC = Job(name="ProcJobC") self.procJobC["state"] = "complete" self.procJobC["location"] = "site1" self.procJobA.create(procJobGroup) self.procJobB.create(procJobGroup) self.procJobC.create(procJobGroup) self.mainMergeSub = Subscription(fileset=unmergedOutputFileset, workflow=mainProcMergeWorkflow, type="Merge") self.mainMergeSub.create() self.mainMergeSub.acquireFiles(unmergedFileA) self.mainMergeSub.failFiles(unmergedFileB) mergeJobGroup = JobGroup(subscription=self.mainMergeSub) mergeJobGroup.create() self.mergeJobA = Job(name="MergeJobA") self.mergeJobA["state"] = "exhausted" self.mergeJobA["location"] = "site1" self.mergeJobB = Job(name="MergeJobB") self.mergeJobB["state"] = "cleanout" self.mergeJobB["location"] = "site1" self.mergeJobC = Job(name="MergeJobC") self.mergeJobC["state"] = "new" self.mergeJobC["location"] = "site1" self.mergeJobA.create(mergeJobGroup) self.mergeJobB.create(mergeJobGroup) self.mergeJobC.create(mergeJobGroup) self.mainCleanupSub = Subscription(fileset=unmergedOutputFileset, workflow=mainCleanupWorkflow, type="Cleanup") self.mainCleanupSub.create() self.mainCleanupSub.acquireFiles(unmergedFileA) self.mainCleanupSub.completeFiles(unmergedFileB) cleanupJobGroup = JobGroup(subscription=self.mainCleanupSub) cleanupJobGroup.create() self.cleanupJobA = Job(name="CleanupJobA") self.cleanupJobA["state"] = "new" self.cleanupJobA["location"] = "site1" self.cleanupJobB = Job(name="CleanupJobB") self.cleanupJobB["state"] = "executing" self.cleanupJobB["location"] = "site1" self.cleanupJobC = Job(name="CleanupJobC") self.cleanupJobC["state"] = "complete" self.cleanupJobC["location"] = "site1" self.cleanupJobA.create(cleanupJobGroup) self.cleanupJobB.create(cleanupJobGroup) self.cleanupJobC.create(cleanupJobGroup) jobList = [ self.procJobA, self.procJobB, self.procJobC, self.mergeJobA, self.mergeJobB, self.mergeJobC, self.cleanupJobA, self.cleanupJobB, self.cleanupJobC ] changeStateAction.execute(jobList) if baAPI: for job in jobList: job['plugin'] = 'TestPlugin' job['userdn'] = userDN job['usergroup'] = 'DEFAULT' job['userrole'] = 'DEFAULT' job['custom']['location'] = 'site1' baAPI.createNewJobs(wmbsJobs=jobList) # We'll create an unrelated workflow to verify that it isn't affected # by the killing code. bogusFileset = Fileset("dontkillme") bogusFileset.create() bogusFileA = File("bogus/lfnA", locations="goodse.cern.ch") bogusFileA.create() bogusFileset.addFile(bogusFileA) bogusFileset.commit() bogusWorkflow = Workflow(spec="spec2", owner="Steve", name="Bogus", task="Proc") bogusWorkflow.create() self.bogusSub = Subscription(fileset=bogusFileset, workflow=bogusWorkflow, type="Processing") self.bogusSub.create() self.bogusSub.acquireFiles(bogusFileA) return
def test_AutoIncrementCheck(self): """ _AutoIncrementCheck_ Test and see whether we can find and set the auto_increment values """ myThread = threading.currentThread() if not myThread.dialect.lower() == 'mysql': return testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) testSubscription.create() testFileA = File(lfn=makeUUID(), locations="test.site.ch") testFileB = File(lfn=makeUUID(), locations="test.site.ch") testFileA.create() testFileB.create() testFileset.addFile([testFileA, testFileB]) testFileset.commit() testSubscription.acquireFiles([testFileA, testFileB]) testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() incrementDAO = self.daoFactory(classname="Jobs.AutoIncrementCheck") incrementDAO.execute() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 1) incrementDAO.execute() testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 2) incrementDAO.execute(input=10) testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 11) incrementDAO.execute(input=5) testJob = Job() testJob.create(group=testJobGroup) self.assertEqual(testJob.exists(), 12) return
def testReRecoDroppingRECO(self): """ _testReRecoDroppingRECO_ Verify that ReReco workflows can be created and inserted into WMBS correctly. The ReReco workflow is just a DataProcessing workflow with skims tacked on. This tests run on unmerged RECO output """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = ReRecoWorkloadFactory.getTestArguments() dataProcArguments["ProcessingString"] = "ProcString" dataProcArguments["ConfigCacheID"] = recoConfig dataProcArguments.update({ "SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": skimConfig }) dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["TransientOutputModules"] = ["RECOoutput"] dataProcArguments["EnableHarvesting"] = True dataProcArguments["DQMConfigCacheID"] = self.injectDQMHarvestConfig() factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", dataProcArguments) self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children. \ SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules. \ Merged.mergedLFNBase, '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) skimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["SkimA", "SkimB"] for goldenOutputMod in goldenOutputMods: mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset( name="/TestWorkload/DataProcessing/unmerged-RECOoutput") topLevelFileset.loadData() skimSubscription = Subscription(fileset=topLevelFileset, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset( name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmerged, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset( name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: skimMergeLogCollect = Fileset( name= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=skimMergeLogCollect, workflow=skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testGetOutputParentLFNs(self): """ _testGetOutputParentLFNs_ Verify that the getOutputDBSParentLFNs() method returns the correct parent LFNs. """ testWorkflow = Workflow(spec="spec.xml", owner="Simon", name="wf001", task="Test") testWorkflow.create() testWMBSFileset = Fileset(name="TestFileset") testWMBSFileset.create() testSubscription = Subscription(fileset=testWMBSFileset, workflow=testWorkflow) testSubscription.create() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10, merged=True) testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10, merged=True) testFileC = File(lfn="/this/is/a/lfnC", size=1024, events=10, merged=False) testFileD = File(lfn="/this/is/a/lfnD", size=1024, events=10, merged=False) testFileE = File(lfn="/this/is/a/lfnE", size=1024, events=10, merged=True) testFileF = File(lfn="/this/is/a/lfnF", size=1024, events=10, merged=True) testFileA.create() testFileB.create() testFileC.create() testFileD.create() testFileE.create() testFileF.create() testFileE.addChild(testFileC["lfn"]) testFileF.addChild(testFileD["lfn"]) testJobA = Job(name="TestJob", files=[testFileA, testFileB]) testJobA["couch_record"] = "somecouchrecord" testJobA["location"] = "test.site.ch" testJobA.create(group=testJobGroup) testJobA.associateFiles() testJobB = Job(name="TestJobB", files=[testFileC, testFileD]) testJobB["couch_record"] = "somecouchrecord" testJobB["location"] = "test.site.ch" testJobB.create(group=testJobGroup) testJobB.associateFiles() goldenLFNs = ["/this/is/a/lfnA", "/this/is/a/lfnB"] parentLFNs = testJobA.getOutputDBSParentLFNs() for parentLFN in parentLFNs: assert parentLFN in goldenLFNs, \ "ERROR: Unknown lfn: %s" % parentLFN goldenLFNs.remove(parentLFN) assert len(goldenLFNs) == 0, \ "ERROR: LFNs are missing: %s" % goldenLFNs goldenLFNs = ["/this/is/a/lfnE", "/this/is/a/lfnF"] parentLFNs = testJobB.getOutputDBSParentLFNs() for parentLFN in parentLFNs: assert parentLFN in goldenLFNs, \ "ERROR: Unknown lfn: %s" % parentLFN goldenLFNs.remove(parentLFN) assert len(goldenLFNs) == 0, \ "ERROR: LFNs are missing..." return
def setUp(self): """ _setUp_ Create two subscriptions: One that contains a single file and one that contains multiple files. """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["WMCore.WMBS"], useDefault=False) myThread = threading.currentThread() daofactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) locationAction = daofactory(classname="Locations.New") locationAction.execute(siteName="site1", pnn="T2_CH_CERN") self.multipleFileFileset = Fileset(name="TestFileset1") self.multipleFileFileset.create() for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations="T2_CH_CERN") newFile.addRun(Run(i, *[45 + i])) newFile.create() self.multipleFileFileset.addFile(newFile) self.multipleFileFileset.commit() self.singleFileFileset = Fileset(name="TestFileset2") self.singleFileFileset.create() newFile = File("/some/file/name", size=1000, events=100, locations="T2_CH_CERN") newFile.addRun(Run(1, *[45])) newFile.create() self.singleFileFileset.addFile(newFile) self.singleFileFileset.commit() self.multipleFileRunset = Fileset(name="TestFileset3") self.multipleFileRunset.create() for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations="T2_CH_CERN") newFile.addRun(Run(i / 3, *[45])) newFile.create() self.multipleFileRunset.addFile(newFile) self.multipleFileRunset.commit() self.singleRunFileset = Fileset(name="TestFileset4") self.singleRunFileset.create() for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations="T2_CH_CERN") newFile.addRun(Run(1, *[45])) newFile.create() self.singleRunFileset.addFile(newFile) self.singleRunFileset.commit() self.singleRunMultipleLumi = Fileset(name="TestFileset5") self.singleRunMultipleLumi.create() for i in range(10): newFile = File(makeUUID(), size=1000, events=100, locations="T2_CH_CERN") newFile.addRun(Run(1, *[45 + i])) newFile.create() self.singleRunMultipleLumi.addFile(newFile) self.singleRunMultipleLumi.commit() testWorkflow = Workflow(spec="spec.xml", owner="mnorman", name="wf001", task="Test") testWorkflow.create() self.multipleFileSubscription = Subscription( fileset=self.multipleFileFileset, workflow=testWorkflow, split_algo="RunBased", type="Processing") self.singleFileSubscription = Subscription( fileset=self.singleFileFileset, workflow=testWorkflow, split_algo="RunBased", type="Processing") self.multipleRunSubscription = Subscription( fileset=self.multipleFileRunset, workflow=testWorkflow, split_algo="RunBased", type="Processing") self.singleRunSubscription = Subscription( fileset=self.singleRunFileset, workflow=testWorkflow, split_algo="RunBased", type="Processing") self.singleRunMultipleLumiSubscription = Subscription( fileset=self.singleRunMultipleLumi, workflow=testWorkflow, split_algo="RunBased", type="Processing") self.multipleFileSubscription.create() self.singleFileSubscription.create() self.multipleRunSubscription.create() self.singleRunSubscription.create() self.singleRunMultipleLumiSubscription.create() return
def testFailJobInput(self): """ _testFailJobInput_ Test the Jobs.FailInput DAO and verify that it doesn't affect other jobs/subscriptions that run over the same files. """ testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") bogusWorkflow = Workflow(spec="spec1.xml", owner="Steve", name="wf002", task="Test") testWorkflow.create() bogusWorkflow.create() testFileset = Fileset(name="TestFileset") bogusFileset = Fileset(name="BogusFileset") testFileset.create() bogusFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) bogusSubscription = Subscription(fileset=bogusFileset, workflow=bogusWorkflow) testSubscription.create() bogusSubscription.create() testFileA = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileB = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileC = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileA.create() testFileB.create() testFileC.create() testFileset.addFile([testFileA, testFileB, testFileC]) bogusFileset.addFile([testFileA, testFileB, testFileC]) testFileset.commit() bogusFileset.commit() testSubscription.completeFiles([testFileA, testFileB, testFileC]) bogusSubscription.acquireFiles([testFileA, testFileB, testFileC]) testJobGroup = JobGroup(subscription=testSubscription) bogusJobGroup = JobGroup(subscription=bogusSubscription) testJobGroup.create() bogusJobGroup.create() testJobA = Job(name="TestJobA", files=[testFileA, testFileB, testFileC]) testJobB = Job(name="TestJobB", files=[testFileA, testFileB, testFileC]) bogusJob = Job(name="BogusJob", files=[testFileA, testFileB, testFileC]) testJobA.create(group=testJobGroup) testJobB.create(group=testJobGroup) bogusJob.create(group=bogusJobGroup) testJobA.failInputFiles() testJobB.failInputFiles() self.assertEqual(len(testSubscription.filesOfStatus("Available")), 0) self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 0) self.assertEqual(len(testSubscription.filesOfStatus("Failed")), 3) self.assertEqual(len(testSubscription.filesOfStatus("Completed")), 0) changeStateAction = self.daoFactory(classname="Jobs.ChangeState") testJobB["state"] = "cleanout" changeStateAction.execute([testJobB]) # Try again testJobA.failInputFiles() # Should now be failed self.assertEqual(len(testSubscription.filesOfStatus("Available")), 0) self.assertEqual(len(testSubscription.filesOfStatus("Acquired")), 0) self.assertEqual(len(testSubscription.filesOfStatus("Failed")), 3) self.assertEqual(len(testSubscription.filesOfStatus("Completed")), 0) # bogus should be unchanged self.assertEqual(len(bogusSubscription.filesOfStatus("Available")), 0) self.assertEqual(len(bogusSubscription.filesOfStatus("Acquired")), 3) self.assertEqual(len(bogusSubscription.filesOfStatus("Failed")), 0) self.assertEqual(len(bogusSubscription.filesOfStatus("Completed")), 0) return
def testStoreResults(self): """ _testStoreResults_ Create a StoreResults workflow and verify it installs into WMBS correctly. """ arguments = StoreResultsWorkloadFactory.getTestArguments() factory = StoreResultsWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", arguments) testWMBSHelper = WMBSHelper(testWorkload, "StoreResults", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) testWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/StoreResults") testWorkflow.load() self.assertEqual(len(testWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") goldenOutputMods = {"Merged": "USER"} for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergedOutput = testWorkflow.outputMap[fset][0][ "merged_output_fileset"] unmergedOutput = testWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % fset, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % fset, "Error: Unmerged output fileset is wrong: %s." % unmergedOutput.name) logArchOutput = testWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = testWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-StoreResults-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=testWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") return
def testCompleteJobInput(self): """ _testCompleteJobInput_ Verify the correct output of the CompleteInput DAO. This should mark the input for a job as complete once all the jobs that run over a particular file have complete successfully. """ testWorkflow = Workflow(spec="spec.xml", owner="Steve", name="wf001", task="Test") bogusWorkflow = Workflow(spec="spec1.xml", owner="Steve", name="wf002", task="Test") testWorkflow.create() bogusWorkflow.create() testFileset = Fileset(name="TestFileset") bogusFileset = Fileset(name="BogusFileset") testFileset.create() bogusFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow) bogusSubscription = Subscription(fileset=bogusFileset, workflow=bogusWorkflow) testSubscription.create() bogusSubscription.create() testFileA = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileB = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileC = File(lfn=makeUUID(), locations="T2_CH_CERN") testFileA.create() testFileB.create() testFileC.create() testFileset.addFile([testFileA, testFileB, testFileC]) bogusFileset.addFile([testFileA, testFileB, testFileC]) testFileset.commit() bogusFileset.commit() testSubscription.acquireFiles([testFileA, testFileB, testFileC]) bogusSubscription.acquireFiles([testFileA, testFileB, testFileC]) testJobGroup = JobGroup(subscription=testSubscription) bogusJobGroup = JobGroup(subscription=bogusSubscription) testJobGroup.create() bogusJobGroup.create() testJobA = Job(name="TestJobA", files=[testFileA]) testJobB = Job(name="TestJobB", files=[testFileA, testFileB]) testJobC = Job(name="TestJobC", files=[testFileC]) bogusJob = Job(name="BogusJob", files=[testFileA, testFileB, testFileC]) testJobA.create(group=testJobGroup) testJobB.create(group=testJobGroup) testJobC.create(group=testJobGroup) bogusJob.create(group=bogusJobGroup) testJobA["outcome"] = "success" testJobB["outcome"] = "failure" testJobC["outcome"] = "success" testJobA.save() testJobB.save() testJobC.save() testJobA.completeInputFiles() compFiles = len(testSubscription.filesOfStatus("Completed")) assert compFiles == 0, \ "Error: test sub has wrong number of complete files: %s" % compFiles testJobB["outcome"] = "success" testJobB.save() testJobB.completeInputFiles(skipFiles=[testFileB["lfn"]]) availFiles = len(testSubscription.filesOfStatus("Available")) assert availFiles == 0, \ "Error: test sub has wrong number of available files: %s" % availFiles acqFiles = len(testSubscription.filesOfStatus("Acquired")) assert acqFiles == 1, \ "Error: test sub has wrong number of acquired files: %s" % acqFiles compFiles = len(testSubscription.filesOfStatus("Completed")) assert compFiles == 1, \ "Error: test sub has wrong number of complete files: %s" % compFiles failFiles = len(testSubscription.filesOfStatus("Failed")) assert failFiles == 1, \ "Error: test sub has wrong number of failed files: %s" % failFiles availFiles = len(bogusSubscription.filesOfStatus("Available")) assert availFiles == 0, \ "Error: test sub has wrong number of available files: %s" % availFiles acqFiles = len(bogusSubscription.filesOfStatus("Acquired")) assert acqFiles == 3, \ "Error: test sub has wrong number of acquired files: %s" % acqFiles compFiles = len(bogusSubscription.filesOfStatus("Completed")) assert compFiles == 0, \ "Error: test sub has wrong number of complete files: %s" % compFiles failFiles = len(bogusSubscription.filesOfStatus("Failed")) assert failFiles == 0, \ "Error: test sub has wrong number of failed files: %s" % failFiles return
def setUp(self): """ _setUp_ """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setDatabaseConnection() self.testInit.setSchema(customModules=["T0.WMBS"]) self.splitterFactory = SplitterFactory(package="T0.JobSplitting") myThread = threading.currentThread() daoFactory = DAOFactory(package="T0.WMBS", logger=logging, dbinterface=myThread.dbi) myThread.dbi.processData("""INSERT INTO wmbs_location (id, site_name, state) VALUES (1, 'SomeSite', 1) """, transaction=False) myThread.dbi.processData("""INSERT INTO wmbs_location_senames (location, se_name) VALUES (1, 'SomeSE') """, transaction=False) myThread.dbi.processData("""INSERT INTO wmbs_location_senames (location, se_name) VALUES (1, 'SomeSE2') """, transaction=False) insertRunDAO = daoFactory(classname="RunConfig.InsertRun") insertRunDAO.execute(binds={ 'RUN': 1, 'TIME': int(time.time()), 'HLTKEY': "someHLTKey" }, transaction=False) insertLumiDAO = daoFactory(classname="RunConfig.InsertLumiSection") for lumi in [1, 2, 3, 4]: insertLumiDAO.execute(binds={ 'RUN': 1, 'LUMI': lumi }, transaction=False) insertStreamDAO = daoFactory(classname="RunConfig.InsertStream") insertStreamDAO.execute(binds={'STREAM': "A"}, transaction=False) insertStreamFilesetDAO = daoFactory( classname="RunConfig.InsertStreamFileset") insertStreamFilesetDAO.execute(1, "A", "TestFileset1") self.fileset1 = Fileset(name="TestFileset1") self.fileset1.load() workflow1 = Workflow(spec="spec.xml", owner="hufnagel", name="TestWorkflow1", task="Test") workflow1.create() self.subscription1 = Subscription(fileset=self.fileset1, workflow=workflow1, split_algo="Repack", type="Repack") self.subscription1.create() # keep for later self.insertClosedLumiDAO = daoFactory( classname="RunLumiCloseout.InsertClosedLumi") self.currentTime = int(time.time()) # default split parameters self.splitArgs = {} self.splitArgs['maxSizeSingleLumi'] = 20 * 1024 * 1024 * 1024 self.splitArgs['maxSizeMultiLumi'] = 10 * 1024 * 1024 * 1024 self.splitArgs['maxInputEvents'] = 500000 self.splitArgs['maxInputFiles'] = 1000 return
def testGetOutputMapDAO(self): """ _testGetOutputMapDAO_ Verify the proper behavior of the GetOutputMapDAO for a variety of different processing chains. """ recoOutputFileset = Fileset(name="RECO") recoOutputFileset.create() mergedRecoOutputFileset = Fileset(name="MergedRECO") mergedRecoOutputFileset.create() alcaOutputFileset = Fileset(name="ALCA") alcaOutputFileset.create() mergedAlcaOutputFileset = Fileset(name="MergedALCA") mergedAlcaOutputFileset.create() dqmOutputFileset = Fileset(name="DQM") dqmOutputFileset.create() mergedDqmOutputFileset = Fileset(name="MergedDQM") mergedDqmOutputFileset.create() cleanupFileset = Fileset(name="Cleanup") cleanupFileset.create() testWorkflow = Workflow(spec="wf001.xml", owner="Steve", name="TestWF", task="None") testWorkflow.create() testWorkflow.addOutput("output", recoOutputFileset, mergedRecoOutputFileset) testWorkflow.addOutput("ALCARECOStreamCombined", alcaOutputFileset, mergedAlcaOutputFileset) testWorkflow.addOutput("DQM", dqmOutputFileset, mergedDqmOutputFileset) testWorkflow.addOutput("output", cleanupFileset) testWorkflow.addOutput("ALCARECOStreamCombined", cleanupFileset) testWorkflow.addOutput("DQM", cleanupFileset) testRecoMergeWorkflow = Workflow(spec="wf002.xml", owner="Steve", name="TestRecoMergeWF", task="None") testRecoMergeWorkflow.create() testRecoMergeWorkflow.addOutput("anything", mergedRecoOutputFileset, mergedRecoOutputFileset) testRecoProcWorkflow = Workflow(spec="wf004.xml", owner="Steve", name="TestRecoProcWF", task="None") testRecoProcWorkflow.create() testAlcaChildWorkflow = Workflow(spec="wf003.xml", owner="Steve", name="TestAlcaChildWF", task="None") testAlcaChildWorkflow.create() inputFile = File(lfn="/path/to/some/lfn", size=600000, events=60000, locations="cmssrm.fnal.gov") inputFile.create() testFileset = Fileset(name="TestFileset") testFileset.create() testFileset.addFile(inputFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="EventBased", type="Processing") testMergeRecoSubscription = Subscription( fileset=recoOutputFileset, workflow=testRecoMergeWorkflow, split_algo="WMBSMergeBySize", type="Merge") testProcRecoSubscription = Subscription(fileset=recoOutputFileset, workflow=testRecoProcWorkflow, split_algo="FileBased", type="Processing") testChildAlcaSubscription = Subscription( fileset=alcaOutputFileset, workflow=testAlcaChildWorkflow, split_algo="FileBased", type="Processing") testSubscription.create() testMergeRecoSubscription.create() testProcRecoSubscription.create() testChildAlcaSubscription.create() testSubscription.acquireFiles() testJobGroup = JobGroup(subscription=testSubscription) testJobGroup.create() testJob = Job(name="SplitJobA", files=[inputFile]) testJob.create(group=testJobGroup) testJob["state"] = "complete" testJob.save() outputMapAction = self.daoFactory(classname="Jobs.GetOutputMap") outputMap = outputMapAction.execute(jobID=testJob["id"]) assert len(outputMap.keys()) == 3, \ "Error: Wrong number of outputs for primary workflow." goldenMap = { "output": (recoOutputFileset.id, mergedRecoOutputFileset.id), "ALCARECOStreamCombined": (alcaOutputFileset.id, mergedAlcaOutputFileset.id), "DQM": (dqmOutputFileset.id, mergedDqmOutputFileset.id) } for outputID in outputMap.keys(): for outputFilesets in outputMap[outputID]: if outputFilesets["merged_output_fileset"] == None: self.assertEqual(outputFilesets["output_fileset"], cleanupFileset.id, "Error: Cleanup fileset is wrong.") continue self.assertTrue(outputID in goldenMap.keys(), "Error: Output identifier is missing.") self.assertEqual(outputFilesets["output_fileset"], goldenMap[outputID][0], "Error: Output fileset is wrong.") self.assertEqual(outputFilesets["merged_output_fileset"], goldenMap[outputID][1], "Error: Merged output fileset is wrong.") del goldenMap[outputID] self.assertEqual(len(goldenMap.keys()), 0, "Error: Missing output maps.") return