def testStoreResults(self): """ _testStoreResults_ Create a StoreResults workflow and verify it installs into WMBS correctly. """ arguments = StoreResultsWorkloadFactory.getTestArguments() arguments.update({'CmsPath': "/uscmst1/prod/sw/cms"}) factory = StoreResultsWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", arguments) testWMBSHelper = WMBSHelper(testWorkload, "StoreResults", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) testWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/StoreResults") testWorkflow.load() self.assertEqual(len(testWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") goldenOutputMods = ["Merged"] for goldenOutputMod in goldenOutputMods: mergedOutput = testWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = testWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s." % unmergedOutput.name) logArchOutput = testWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = testWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-StoreResults-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=testWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") return
def testStoreResults(self): """ _testStoreResults_ Create a StoreResults workflow and verify it installs into WMBS correctly. """ arguments = StoreResultsWorkloadFactory.getTestArguments() factory = StoreResultsWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", arguments) testWMBSHelper = WMBSHelper(testWorkload, "StoreResults", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) testWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/StoreResults") testWorkflow.load() self.assertEqual(len(testWorkflow.outputMap), 2, "Error: Wrong number of WF outputs.") goldenOutputMods = {"Merged": "USER"} for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier mergedOutput = testWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = testWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % fset, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % fset, "Error: Unmerged output fileset is wrong: %s." % unmergedOutput.name) logArchOutput = testWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = testWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive", "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-StoreResults-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=testWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") return
def createAndUploadPublish(self, jobList): taskList = {} for job in jobList: taskList[(job['workflow'], job['task'])] = job['cache_dir'] for (workflow, task) in taskList.keys(): wf = Workflow(name=workflow, task=task) wf.load() try: uploadPublishWorkflow(wf, ufcEndpoint=self.userFileCacheURL, workDir=self.uploadPublishDir) except Exception, ex: logging.error('Upload failed for workflow, task: %s, %s' % (workflow, task)) logging.error(str(ex))
def createAndUploadPublish(self, jobList): taskList = {} for job in jobList: taskList[(job['workflow'], job['task'])] = job['cache_dir'] for (workflow, task) in taskList.keys(): wf = Workflow(name=workflow, task=task) wf.load() try: uploadPublishWorkflow(self.config, wf, ufcEndpoint=self.userFileCacheURL, workDir=self.uploadPublishDir) except Exception as ex: logging.error('Upload failed for workflow, task: %s, %s' % (workflow, task)) logging.error(str(ex)) return
def testLoad(self): """ _testLoad_ Create a workflow and then try to load it from the database using the following load methods: Workflow.LoadFromName Workflow.LoadFromID Workflow.LoadFromSpecOwner """ testWorkflowA = Workflow( spec="spec.xml", owner="Simon", name="wf001", task="Test", wfType="ReReco", priority=1000 ) testWorkflowA.create() testWorkflowB = Workflow(name="wf001", task="Test") testWorkflowB.load() self.assertTrue( (testWorkflowA.id == testWorkflowB.id) and (testWorkflowA.name == testWorkflowB.name) and (testWorkflowA.spec == testWorkflowB.spec) and (testWorkflowA.task == testWorkflowB.task) and (testWorkflowA.wfType == testWorkflowB.wfType) and (testWorkflowA.owner == testWorkflowB.owner) and (testWorkflowA.priority == testWorkflowB.priority), "ERROR: Workflow.LoadFromName Failed", ) testWorkflowC = Workflow(id=testWorkflowA.id) testWorkflowC.load() self.assertTrue( (testWorkflowA.id == testWorkflowC.id) and (testWorkflowA.name == testWorkflowC.name) and (testWorkflowA.spec == testWorkflowC.spec) and (testWorkflowA.task == testWorkflowC.task) and (testWorkflowA.wfType == testWorkflowC.wfType) and (testWorkflowA.owner == testWorkflowC.owner) and (testWorkflowA.priority == testWorkflowB.priority), "ERROR: Workflow.LoadFromID Failed", ) testWorkflowD = Workflow(spec="spec.xml", owner="Simon", task="Test") testWorkflowD.load() self.assertTrue( (testWorkflowA.id == testWorkflowD.id) and (testWorkflowA.name == testWorkflowD.name) and (testWorkflowA.spec == testWorkflowD.spec) and (testWorkflowA.task == testWorkflowD.task) and (testWorkflowA.wfType == testWorkflowD.wfType) and (testWorkflowA.owner == testWorkflowD.owner) and (testWorkflowA.priority == testWorkflowB.priority), "ERROR: Workflow.LoadFromSpecOwner Failed", ) testWorkflowA.delete() return
def testLoad(self): """ _testLoad_ Create a workflow and then try to load it from the database using the following load methods: Workflow.LoadFromName Workflow.LoadFromID Workflow.LoadFromSpecOwner """ testWorkflowA = Workflow(spec="spec.xml", owner="Simon", name="wf001", task='Test', wfType="ReReco", priority=1000) testWorkflowA.create() testWorkflowB = Workflow(name="wf001", task='Test') testWorkflowB.load() self.assertTrue((testWorkflowA.id == testWorkflowB.id) and (testWorkflowA.name == testWorkflowB.name) and (testWorkflowA.spec == testWorkflowB.spec) and (testWorkflowA.task == testWorkflowB.task) and (testWorkflowA.wfType == testWorkflowB.wfType) and (testWorkflowA.owner == testWorkflowB.owner) and (testWorkflowA.priority == testWorkflowB.priority), "ERROR: Workflow.LoadFromName Failed") testWorkflowC = Workflow(id=testWorkflowA.id) testWorkflowC.load() self.assertTrue((testWorkflowA.id == testWorkflowC.id) and (testWorkflowA.name == testWorkflowC.name) and (testWorkflowA.spec == testWorkflowC.spec) and (testWorkflowA.task == testWorkflowC.task) and (testWorkflowA.wfType == testWorkflowC.wfType) and (testWorkflowA.owner == testWorkflowC.owner) and (testWorkflowA.priority == testWorkflowB.priority), "ERROR: Workflow.LoadFromID Failed") testWorkflowD = Workflow(spec="spec.xml", owner="Simon", task='Test') testWorkflowD.load() self.assertTrue((testWorkflowA.id == testWorkflowD.id) and (testWorkflowA.name == testWorkflowD.name) and (testWorkflowA.spec == testWorkflowD.spec) and (testWorkflowA.task == testWorkflowD.task) and (testWorkflowA.wfType == testWorkflowD.wfType) and (testWorkflowA.owner == testWorkflowD.owner) and (testWorkflowA.priority == testWorkflowB.priority), "ERROR: Workflow.LoadFromSpecOwner Failed") testWorkflowA.delete() return
def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', seName = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertSite(siteName = 'site2', seName = 'goodse2.cern.ch', ceName = 'site2', plugin = "TestPlugin") testWorkload = self.createTestWMSpec() testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock", cachepath = self.workDir) testWMBSHelper.createSubscription() procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual(procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputA"][0]["merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputA"][0]["output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual(mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual(mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual(unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual(mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset = unmergedProcOutput, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return
def testMonteCarloFromGEN(self): """ _testMonteCarloFromGEN_ Create a MonteCarloFromGEN workflow and verify it installs into WMBS correctly. """ arguments = MonteCarloFromGENWorkloadFactory.getTestArguments() arguments["ConfigCacheID"] = self.injectConfig() arguments["CouchDBName"] = "mclhe_t" arguments["PrimaryDataset"] = "WaitThisIsNotMinimumBias" factory = MonteCarloFromGENWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", arguments) outputDatasets = testWorkload.listOutputDatasets() self.assertEqual(len(outputDatasets), 2) self.assertTrue("/WaitThisIsNotMinimumBias/FAKE-FilterRECO-FAKE-v1/RECO" in outputDatasets) self.assertTrue("/WaitThisIsNotMinimumBias/FAKE-FilterALCARECO-FAKE-v1/ALCARECO" in outputDatasets) productionTask = testWorkload.getTaskByPath('/TestWorkload/MonteCarloFromGEN') splitting = productionTask.jobSplittingParameters() self.assertFalse(splitting["deterministicPileup"]) testWMBSHelper = WMBSHelper(testWorkload, "MonteCarloFromGEN", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") self.assertEqual(procWorkflow.wfType, 'production') goldenOutputMods = ["outputRECORECO", "outputALCARECOALCARECO"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-MonteCarloFromGEN-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Production", "Error: Wrong subscription type: %s" % procSubscription["type"]) self.assertEqual(procSubscription["split_algo"], "EventAwareLumiBased", "Error: Wrong split algo.") unmergedReco = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputRECORECO") unmergedReco.loadData() recoMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO") recoMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedReco, workflow = recoMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) unmergedAlca = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputALCARECOALCARECO") unmergedAlca.loadData() alcaMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO") alcaMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlca, workflow = alcaMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) for procOutput in ["outputRECORECO", "outputALCARECOALCARECO"]: unmerged = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/MonteCarloFromGENoutputRECORECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/MonteCarloFromGENoutputALCARECOALCARECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testPromptRecoWithSkims(self): """ _testT1PromptRecoWithSkim_ Create a T1 Prompt Reconstruction workflow with PromptSkims and verify it installs into WMBS correctly. """ testArguments = PromptRecoWorkloadFactory.getTestArguments() testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["EnableHarvesting"] = True factory = PromptRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) recoWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco") recoWorkflow.load() self.assertEqual(len(recoWorkflow.outputMap), len(testArguments["WriteTiers"]) + 1, "Error: Wrong number of WF outputs in the Reco WF.") goldenOutputMods = {"write_RECO": "RECO", "write_ALCARECO": "ALCARECO", "write_AOD": "AOD", "write_DQM": "DQM"} for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier mergedOutput = recoWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = recoWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_ALCARECO": self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = recoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim") alcaSkimWorkflow.load() self.assertEqual(len(alcaSkimWorkflow.outputMap), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: fset = goldenOutputMod + "ALCARECO" mergedOutput = alcaSkimWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged") dqmWorkflow.load() logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = {"write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM"} for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap)) mergedMergeOutput = mergeWorkflow.outputMap["MergedALCARECO"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["MergedALCARECO"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Reco-SomeBlock") topLevelFileset.loadData() recoSubscription = Subscription(fileset=topLevelFileset, workflow=recoWorkflow) recoSubscription.loadData() self.assertEqual(recoSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(recoSubscription["split_algo"], "EventAwareLumiBased", "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"]) alcaRecoFileset = Fileset(name="/TestWorkload/Reco/unmerged-write_ALCARECOALCARECO") alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset=alcaRecoFileset, workflow=alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(alcaSkimSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedDQMFileset = Fileset(name="/TestWorkload/Reco/RecoMergewrite_DQM/merged-MergedDQM") mergedDQMFileset.loadData() dqmSubscription = Subscription(fileset=mergedDQMFileset, workflow=dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") unmergedOutputs = {"write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM"} for unmergedOutput, tier in viewitems(unmergedOutputs): fset = unmergedOutput + tier unmergedDataTier = Fileset(name="/TestWorkload/Reco/unmerged-%s" % fset) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = [] for alcaProd in testArguments["AlcaSkims"]: unmergedOutputs.append("ALCARECOStream%s" % alcaProd) for unmergedOutput in unmergedOutputs: unmergedAlcaSkim = Fileset(name="/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % unmergedOutput) unmergedAlcaSkim.loadData() alcaSkimMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput) alcaSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedAlcaSkim, workflow=alcaSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = {"write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM"} for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier unmergedFileset = Fileset(name="/TestWorkload/Reco/unmerged-%s" % fset) unmergedFileset.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name="/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") recoLogCollect = Fileset(name="/TestWorkload/Reco/unmerged-logArchive") recoLogCollect.loadData() recoLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/LogCollect") recoLogCollectWorkflow.load() logCollectSub = Subscription(fileset=recoLogCollect, workflow=recoLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset(name="/TestWorkload/Reco/AlcaSkim/unmerged-logArchive") alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect") alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: recoMergeLogCollect = Fileset(name="/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod) recoMergeLogCollect.loadData() recoMergeLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % ( goldenOutputMod, goldenOutputMod)) recoMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset=recoMergeLogCollect, workflow=recoMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: alcaSkimLogCollect = Fileset( name="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % ( goldenOutputMod, goldenOutputMod)) alcaSkimLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") dqmHarvestLogCollect = Fileset( name="/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive") dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/RecoMergewrite_DQMMergedEndOfRunDQMHarvestLogCollect") dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset=dqmHarvestLogCollect, workflow=dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testRepack(self): """ _testRepack_ Create a Repack workflow and verify it installs into WMBS correctly. """ testArguments = RepackWorkloadFactory.getTestArguments() testArguments.update(deepcopy(REQUEST)) factory = RepackWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Repack", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) repackWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Repack") repackWorkflow.load() self.assertEqual( len(repackWorkflow.outputMap), len(testArguments["Outputs"]) + 1, "Error: Wrong number of WF outputs in the Repack WF.") goldenOutputMods = { "write_PrimaryDataset1_RAW": "RAW", "write_PrimaryDataset2_RAW": "RAW" } for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier mergedOutput = repackWorkflow.outputMap[fset][0][ "merged_output_fileset"] unmergedOutput = repackWorkflow.outputMap[fset][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_PrimaryDataset1_RAW": self.assertEqual( mergedOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/Repack/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = repackWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = repackWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Repack/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Repack/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod, tier in viewitems(goldenOutputMods): mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Repack/RepackMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap), 3, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Repack") topLevelFileset.loadData() repackSubscription = Subscription(fileset=topLevelFileset, workflow=repackWorkflow) repackSubscription.loadData() self.assertEqual(repackSubscription["type"], "Repack", "Error: Wrong subscription type.") self.assertEqual( repackSubscription["split_algo"], "Repack", "Error: Wrong split algorithm. %s" % repackSubscription["split_algo"]) unmergedOutputs = { "write_PrimaryDataset1_RAW": "RAW", "write_PrimaryDataset2_RAW": "RAW" } for unmergedOutput, tier in viewitems(unmergedOutputs): fset = unmergedOutput + tier unmergedDataTier = Fileset( name="/TestWorkload/Repack/unmerged-%s" % fset) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Repack/RepackMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual( mergeSubscription["split_algo"], "RepackMerge", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier unmergedFileset = Fileset(name="/TestWorkload/Repack/unmerged-%s" % fset) unmergedFileset.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Repack/RepackCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") repackLogCollect = Fileset( name="/TestWorkload/Repack/unmerged-logArchive") repackLogCollect.loadData() repackLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Repack/LogCollect") repackLogCollectWorkflow.load() logCollectSub = Subscription(fileset=repackLogCollect, workflow=repackLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") for goldenOutputMod, tier in viewitems(goldenOutputMods): repackMergeLogCollect = Fileset( name="/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod) repackMergeLogCollect.loadData() repackMergeLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Repack/RepackMerge%s/Repack%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) repackMergeLogCollectWorkflow.load() logCollectSubscription = Subscription( fileset=repackMergeLogCollect, workflow=repackMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") return
def testReRecoDroppingRECO(self): """ _testReRecoDroppingRECO_ Verify that ReReco workflows can be created and inserted into WMBS correctly. The ReReco workflow is just a DataProcessing workflow with skims tacked on. This tests run on unmerged RECO output """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = getTestArguments() dataProcArguments['ProcessingString'] = 'ProcString' dataProcArguments['ConfigCacheID'] = recoConfig dataProcArguments["SkimConfigs"] = [{ "SkimName": "SomeSkim", "SkimInput": "RECOoutput", "SkimSplitAlgo": "FileBased", "SkimSplitArgs": { "files_per_job": 1, "include_parents": True }, "ConfigCacheID": skimConfig, "Scenario": None }] dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["TransientOutputModules"] = ["RECOoutput"] testWorkload = rerecoWorkload("TestWorkload", dataProcArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children.\ SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules.\ Merged.mergedLFNBase, '/store/data/WMAgentCommissioning10/MinimumBias/USER/SkimBFilter-ProcString-v2') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) skimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["SkimA", "SkimB"] for goldenOutputMod in goldenOutputMods: mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset( name="/TestWorkload/DataProcessing/unmerged-RECOoutput") topLevelFileset.loadData() skimSubscription = Subscription(fileset=topLevelFileset, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset( name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmerged, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset( name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: skimMergeLogCollect = Fileset( name= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=skimMergeLogCollect, workflow=skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testReReco(self): """ _testReReco_ Verify that ReReco workflows can be created and inserted into WMBS correctly. """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = ReRecoWorkloadFactory.getTestArguments() dataProcArguments["ProcessingString"] = "ProcString" dataProcArguments["ConfigCacheID"] = recoConfig dataProcArguments.update({ "SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": skimConfig }) dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["EnableHarvesting"] = True dataProcArguments["DQMConfigCacheID"] = self.injectDQMHarvestConfig() factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", dataProcArguments) self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children.DataProcessingMergeRECOoutput. \ tree.children.SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules. \ Merged.mergedLFNBase, '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = {"RECOoutput": "RECO", "DQMoutput": "DQM"} for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier mergedOutput = procWorkflow.outputMap[fset][0][ "merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod, tier in viewitems(goldenOutputMods): mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-DataProcessing-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "EventAwareLumiBased", "Error: Wrong split algo.") unmergedReco = Fileset( name="/TestWorkload/DataProcessing/unmerged-RECOoutputRECO") unmergedReco.loadData() recoMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput") recoMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedReco, workflow=recoMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") unmergedDqm = Fileset( name="/TestWorkload/DataProcessing/unmerged-DQMoutputDQM") unmergedDqm.loadData() dqmMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput") dqmMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDqm, workflow=dqmMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier unmerged = Fileset( name="/TestWorkload/DataProcessing/unmerged-%s" % fset) unmerged.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset( name="/TestWorkload/DataProcessing/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive" ) procLogCollect.loadData() procLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect" ) procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive" ) procLogCollect.loadData() procLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect" ) procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") skimWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim" ) skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = {"SkimA": "RAW-RECO", "SkimB": "USER"} for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier mergedOutput = skimWorkflow.outputMap[fset][0][ "merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod, tier in viewitems(goldenOutputMods): mergeWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-MergedRECO" ) topLevelFileset.loadData() skimSubscription = Subscription(fileset=topLevelFileset, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier unmerged = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % fset) unmerged.loadData() mergeWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmerged, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for goldenOutputMod, tier in viewitems(goldenOutputMods): fset = goldenOutputMod + tier unmerged = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % fset) unmerged.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in goldenOutputMods: skimMergeLogCollect = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/SomeSkim%sMergeLogCollect" % (skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=skimMergeLogCollect, workflow=skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") dqmWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged" ) dqmWorkflow.load() topLevelFileset = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-MergedDQM" ) topLevelFileset.loadData() dqmSubscription = Subscription(fileset=topLevelFileset, workflow=dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") logArchOutput = dqmWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmHarvestLogCollect = Fileset( name= "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive" ) dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/DataProcessingMergeDQMoutputMergedEndOfRunDQMHarvestLogCollect" ) dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset=dqmHarvestLogCollect, workflow=dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def _commonMonteCarloTest(self): """ Retrieve the workload from WMBS and test all its properties. """ goldenOutputMods = {"OutputA": "RECO", "OutputB": "USER"} prodWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production") prodWorkflow.load() self.assertEqual(len(prodWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergedOutput = prodWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = prodWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Production/unmerged-%s" % (goldenOutputMod + tier), "Error: Unmerged output fileset is wrong.") logArchOutput = prodWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = prodWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Production/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Production/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod, tier in goldenOutputMods.items(): mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/ProductionMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") from pprint import pformat print(pformat(mergeWorkflow.outputMap)) mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Production-SomeBlock") topLevelFileset.loadData() prodSubscription = Subscription(fileset=topLevelFileset, workflow=prodWorkflow) prodSubscription.loadData() self.assertEqual(prodSubscription["type"], "Production", "Error: Wrong subscription type.") self.assertEqual(prodSubscription["split_algo"], "EventBased", "Error: Wrong split algo.") for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier unmergedOutput = Fileset(name="/TestWorkload/Production/unmerged-%s" % fset) unmergedOutput.loadData() mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/ProductionMerge%s" % goldenOutputMod) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedOutput, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier unmerged = Fileset(name="/TestWorkload/Production/unmerged-%s" % fset) unmerged.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/ProductionCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name="/TestWorkload/Production/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") for goldenOutputMod in goldenOutputMods: mergeLogCollect = Fileset( name="/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod) mergeLogCollect.loadData() mergeLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Production/ProductionMerge%s/Production%sMergeLogCollect" % ( goldenOutputMod, goldenOutputMod)) mergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=mergeLogCollect, workflow=mergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.")
def testPromptRecoWithSkims(self): """ _testT1PromptRecoWithSkim_ Create a T1 Prompt Reconstruction workflow with PromptSkims and verify it installs into WMBS correctly. """ self.setupPromptSkimConfigObject() testArguments = getTestArguments() testArguments["PromptSkims"] = [self.promptSkim] testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["CouchDBName"] = "promptreco_t" testArguments["EnvPath"] = os.environ.get("EnvPath", None) testArguments["BinPath"] = os.environ.get("BinPath", None) testWorkload = promptrecoWorkload("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) recoWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco") recoWorkflow.load() self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1, "Error: Wrong number of WF outputs in the Reco WF.") goldenOutputMods = [ "write_RECO", "write_ALCARECO", "write_AOD", "write_DQM" ] for goldenOutputMod in goldenOutputMods: mergedOutput = recoWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = recoWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_ALCARECO": self.assertEqual( mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = recoWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim") alcaSkimWorkflow.load() self.assertEqual( len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") promptSkimWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1") promptSkimWorkflow.load() self.assertEqual(len(promptSkimWorkflow.outputMap.keys()), 6, "Error: Wrong number of WF outputs.") goldenOutputMods = [ "fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5" ] for goldenOutputMod in goldenOutputMods: mergedOutput = promptSkimWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = promptSkimWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = promptSkimWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = promptSkimWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual( len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = [ "fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5" ] for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual( len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Reco-SomeBlock") topLevelFileset.loadData() recoSubscription = Subscription(fileset=topLevelFileset, workflow=recoWorkflow) recoSubscription.loadData() self.assertEqual(recoSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual( recoSubscription["split_algo"], "EventBased", "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"]) alcaRecoFileset = Fileset( name="/TestWorkload/Reco/unmerged-write_ALCARECO") alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset=alcaRecoFileset, workflow=alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual( alcaSkimSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedRecoFileset = Fileset( name="/TestWorkload/Reco/RecoMergewrite_RECO/merged-Merged") mergedRecoFileset.loadData() promptSkimSubscription = Subscription(fileset=mergedRecoFileset, workflow=promptSkimWorkflow) promptSkimSubscription.loadData() self.assertEqual(promptSkimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual( promptSkimSubscription["split_algo"], "FileBased", "Error: Wrong split algorithm. %s" % promptSkimSubscription["split_algo"]) unmergedOutputs = ["write_RECO", "write_AOD", "write_DQM"] for unmergedOutput in unmergedOutputs: unmergedDataTier = Fileset(name="/TestWorkload/Reco/unmerged-%s" % unmergedOutput) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual( mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = [] for alcaProd in testArguments["AlcaSkims"]: unmergedOutputs.append("ALCARECOStream%s" % alcaProd) for unmergedOutput in unmergedOutputs: unmergedAlcaSkim = Fileset( name="/TestWorkload/Reco/AlcaSkim/unmerged-%s" % unmergedOutput) unmergedAlcaSkim.loadData() alcaSkimMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput) alcaSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedAlcaSkim, workflow=alcaSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual( mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = [ "fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5" ] for unmergedOutput in unmergedOutputs: unmergedPromptSkim = Fileset( name= "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s" % unmergedOutput) unmergedPromptSkim.loadData() promptSkimMergeWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s" % unmergedOutput) promptSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedPromptSkim, workflow=promptSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual( mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = [ "write_RECO", "write_AOD", "write_DQM", "write_ALCARECO" ] for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name="/TestWorkload/Reco/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset( name="/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = [ "fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5" ] for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset( name= "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s" % unmergedOutput) unmergedFileset.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1CleanupUnmerged%s" % unmergedOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual( cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algorithm. %s" % cleanupSubscription["split_algo"]) recoLogCollect = Fileset(name="/TestWorkload/Reco/unmerged-logArchive") recoLogCollect.loadData() recoLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/LogCollect") recoLogCollectWorkflow.load() logCollectSub = Subscription(fileset=recoLogCollect, workflow=recoLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset( name="/TestWorkload/Reco/AlcaSkim/unmerged-logArchive") alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect") alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") promptSkimLogCollect = Fileset( name= "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive" ) promptSkimLogCollect.loadData() promptSkimLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1LogCollect" ) promptSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset=promptSkimLogCollect, workflow=promptSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: recoMergeLogCollect = Fileset( name="/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod) recoMergeLogCollect.loadData() recoMergeLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) recoMergeLogCollectWorkflow.load() logCollectSubscription = Subscription( fileset=recoMergeLogCollect, workflow=recoMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: alcaSkimLogCollect = Fileset( name= "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) alcaSkimLogCollectWorkflow.load() logCollectSubscription = Subscription( fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [ "fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5" ] for goldenOutputMod in goldenOutputMods: promptSkimMergeLogCollect = Fileset( name= "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive" % goldenOutputMod) promptSkimMergeLogCollect.loadData() promptSkimMergeLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/TestSkim1%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) promptSkimMergeLogCollectWorkflow.load() logCollectSubscription = Subscription( fileset=promptSkimMergeLogCollect, workflow=promptSkimMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") return
def testPromptReco(self): """ _testPromptReco_ Create a Prompt Reconstruction workflow and verify it installs into WMBS correctly. """ testArguments = getTestArguments() testWorkload = promptrecoWorkload("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) recoWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco") recoWorkflow.load() self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1, "Error: Wrong number of WF outputs in the Reco WF.") goldenOutputMods = [ "write_RECO", "write_ALCARECO", "write_AOD", "write_DQM" ] for goldenOutputMod in goldenOutputMods: mergedOutput = recoWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = recoWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_ALCARECO": self.assertEqual( mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = recoWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim") alcaSkimWorkflow.load() self.assertEqual( len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged" ) dqmWorkflow.load() logArchOutput = dqmWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual( len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Reco-SomeBlock") topLevelFileset.loadData() recoSubscription = Subscription(fileset=topLevelFileset, workflow=recoWorkflow) recoSubscription.loadData() self.assertEqual(recoSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual( recoSubscription["split_algo"], "EventBased", "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"]) alcaRecoFileset = Fileset( name="/TestWorkload/Reco/unmerged-write_ALCARECO") alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset=alcaRecoFileset, workflow=alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual( alcaSkimSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedDQMFileset = Fileset( name="/TestWorkload/Reco/RecoMergewrite_DQM/merged-Merged") mergedDQMFileset.loadData() dqmSubscription = Subscription(fileset=mergedDQMFileset, workflow=dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") unmergedOutputs = ["write_RECO", "write_AOD", "write_DQM"] for unmergedOutput in unmergedOutputs: unmergedDataTier = Fileset(name="/TestWorkload/Reco/unmerged-%s" % unmergedOutput) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual( mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = [] for alcaProd in testArguments["AlcaSkims"]: unmergedOutputs.append("ALCARECOStream%s" % alcaProd) for unmergedOutput in unmergedOutputs: unmergedAlcaSkim = Fileset( name="/TestWorkload/Reco/AlcaSkim/unmerged-%s" % unmergedOutput) unmergedAlcaSkim.loadData() alcaSkimMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput) alcaSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedAlcaSkim, workflow=alcaSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual( mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = [ "write_RECO", "write_AOD", "write_DQM", "write_ALCARECO" ] for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name="/TestWorkload/Reco/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset( name="/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") recoLogCollect = Fileset(name="/TestWorkload/Reco/unmerged-logArchive") recoLogCollect.loadData() recoLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/LogCollect") recoLogCollectWorkflow.load() logCollectSub = Subscription(fileset=recoLogCollect, workflow=recoLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset( name="/TestWorkload/Reco/AlcaSkim/unmerged-logArchive") alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect") alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: recoMergeLogCollect = Fileset( name="/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod) recoMergeLogCollect.loadData() recoMergeLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) recoMergeLogCollectWorkflow.load() logCollectSubscription = Subscription( fileset=recoMergeLogCollect, workflow=recoMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: alcaSkimLogCollect = Fileset( name= "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) alcaSkimLogCollectWorkflow.load() logCollectSubscription = Subscription( fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") dqmHarvestLogCollect = Fileset( name= "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/unmerged-logArchive" ) dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/RecoMergewrite_DQMMergedDQMHarvestLogCollect" ) dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset=dqmHarvestLogCollect, workflow=dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def verifyDiscardRAW(self): """ _verifyDiscardRAW_ Verify that a workflow that discards the RAW was installed into WMBS correctly. """ topLevelFileset = Fileset(name = "TestWorkload-StepOneProc-SomeBlock") topLevelFileset.loadData() stepTwoUnmergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-DQMoutput") stepTwoUnmergedDQMFileset.loadData() stepTwoUnmergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-RECODEBUGoutput") stepTwoUnmergedRECOFileset.loadData() stepTwoMergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeDQMoutput/merged-Merged") stepTwoMergedDQMFileset.loadData() stepTwoMergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRECODEBUGoutput/merged-Merged") stepTwoMergedRECOFileset.loadData() stepTwoLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-logArchive") stepTwoLogArchiveFileset.loadData() stepTwoMergeDQMLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeDQMoutput/merged-logArchive") stepTwoMergeDQMLogArchiveFileset.loadData() stepTwoMergeRECOLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRECODEBUGoutput/merged-logArchive") stepTwoMergeRECOLogArchiveFileset.loadData() stepTwoWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc") stepTwoWorkflow.load() self.assertTrue("RECODEBUGoutput" in stepTwoWorkflow.outputMap.keys(), "Error: Step two missing output module.") self.assertTrue("DQMoutput" in stepTwoWorkflow.outputMap.keys(), "Error: Step two missing output module.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["output_fileset"].id, stepTwoUnmergedRECOFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["output_fileset"].id, stepTwoUnmergedDQMFileset.id, "Error: DQM output fileset is wrong.") stepTwoSub = Subscription(workflow = stepTwoWorkflow, fileset = topLevelFileset) stepTwoSub.loadData() self.assertEqual(stepTwoSub["type"], "Processing", "Error: Step two sub has wrong type.") for outputMod in stepTwoWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepTwoCleanupDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedDQMoutput") stepTwoCleanupDQMWorkflow.load() self.assertEqual(len(stepTwoCleanupDQMWorkflow.outputMap.keys()), 0, "Error: Cleanup shouldn't have any output.") stepTwoCleanupDQMSub = Subscription(workflow = stepTwoCleanupDQMWorkflow, fileset = stepTwoUnmergedDQMFileset) stepTwoCleanupDQMSub.loadData() self.assertEqual(stepTwoCleanupDQMSub["type"], "Cleanup", "Error: Step two sub has wrong type.") stepTwoCleanupRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedRECODEBUGoutput") stepTwoCleanupRECOWorkflow.load() self.assertEqual(len(stepTwoCleanupRECOWorkflow.outputMap.keys()), 0, "Error: Cleanup shouldn't have any output.") stepTwoCleanupRECOSub = Subscription(workflow = stepTwoCleanupRECOWorkflow, fileset = stepTwoUnmergedRECOFileset) stepTwoCleanupRECOSub.loadData() self.assertEqual(stepTwoCleanupRECOSub["type"], "Cleanup", "Error: Step two sub has wrong type.") stepTwoLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/LogCollect") stepTwoLogCollectWorkflow.load() self.assertEqual(len(stepTwoLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect shouldn't have any output.") stepTwoLogCollectSub = Subscription(workflow = stepTwoLogCollectWorkflow, fileset = stepTwoLogArchiveFileset) stepTwoLogCollectSub.loadData() self.assertEqual(stepTwoLogCollectSub["type"], "LogCollect", "Error: Step two sub has wrong type.") stepTwoMergeRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRECODEBUGoutput") stepTwoMergeRECOWorkflow.load() self.assertTrue("Merged" in stepTwoMergeRECOWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertTrue("logArchive" in stepTwoMergeRECOWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG merge output fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG merge output fileset is wrong.") stepTwoMergeRECOSub = Subscription(workflow = stepTwoMergeRECOWorkflow, fileset = stepTwoUnmergedRECOFileset) stepTwoMergeRECOSub.loadData() self.assertEqual(stepTwoMergeRECOSub["type"], "Merge", "Error: Step two sub has wrong type.") for outputMod in stepTwoMergeRECOWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoMergeRECOWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepTwoMergeDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeDQMoutput") stepTwoMergeDQMWorkflow.load() self.assertTrue("Merged" in stepTwoMergeDQMWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertTrue("logArchive" in stepTwoMergeDQMWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM merge output fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM merge output fileset is wrong.") stepTwoMergeDQMSub = Subscription(workflow = stepTwoMergeDQMWorkflow, fileset = stepTwoUnmergedDQMFileset) stepTwoMergeDQMSub.loadData() self.assertEqual(stepTwoMergeDQMSub["type"], "Merge", "Error: Step two sub has wrong type.") for outputMod in stepTwoMergeDQMWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoMergeDQMWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") return
def verifyKeepAOD(self): """ _verifyKeepAOD_ Verify that a workflow that only produces AOD in a single step was installed correctly into WMBS. """ topLevelFileset = Fileset(name = "TestWorkload-StepOneProc-SomeBlock") topLevelFileset.loadData() stepTwoUnmergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-aodOutputModule") stepTwoUnmergedAODFileset.loadData() stepTwoMergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeaodOutputModule/merged-Merged") stepTwoMergedAODFileset.loadData() stepTwoLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-logArchive") stepTwoLogArchiveFileset.loadData() stepTwoMergeAODLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeaodOutputModule/merged-logArchive") stepTwoMergeAODLogArchiveFileset.loadData() stepTwoWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc") stepTwoWorkflow.load() self.assertTrue("aodOutputModule" in stepTwoWorkflow.outputMap.keys(), "Error: Step two missing output module.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["aodOutputModule"][0]["merged_output_fileset"].id, stepTwoMergedAODFileset.id, "Error: AOD output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["aodOutputModule"][0]["output_fileset"].id, stepTwoUnmergedAODFileset.id, "Error: AOD output fileset is wrong.") stepTwoSub = Subscription(workflow = stepTwoWorkflow, fileset = topLevelFileset) stepTwoSub.loadData() self.assertEqual(stepTwoSub["type"], "Processing", "Error: Step two sub has wrong type.") for outputMod in stepTwoWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepTwoCleanupAODWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedaodOutputModule") stepTwoCleanupAODWorkflow.load() self.assertEqual(len(stepTwoCleanupAODWorkflow.outputMap.keys()), 0, "Error: Cleanup shouldn't have any output.") stepTwoCleanupAODSub = Subscription(workflow = stepTwoCleanupAODWorkflow, fileset = stepTwoUnmergedAODFileset) stepTwoCleanupAODSub.loadData() self.assertEqual(stepTwoCleanupAODSub["type"], "Cleanup", "Error: Step two sub has wrong type.") stepTwoLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/LogCollect") stepTwoLogCollectWorkflow.load() self.assertEqual(len(stepTwoLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect shouldn't have any output.") stepTwoLogCollectSub = Subscription(workflow = stepTwoLogCollectWorkflow, fileset = stepTwoLogArchiveFileset) stepTwoLogCollectSub.loadData() self.assertEqual(stepTwoLogCollectSub["type"], "LogCollect", "Error: Step two sub has wrong type.") stepTwoMergeAODWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeaodOutputModule") stepTwoMergeAODWorkflow.load() self.assertTrue("Merged" in stepTwoMergeAODWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertTrue("logArchive" in stepTwoMergeAODWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertEqual(stepTwoMergeAODWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeAODLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeAODWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeAODLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeAODWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedAODFileset.id, "Error: AOD merge output fileset is wrong.") self.assertEqual(stepTwoMergeAODWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedAODFileset.id, "Error: AOD merge output fileset is wrong.") stepTwoMergeAODSub = Subscription(workflow = stepTwoMergeAODWorkflow, fileset = stepTwoUnmergedAODFileset) stepTwoMergeAODSub.loadData() self.assertEqual(stepTwoMergeAODSub["type"], "Merge", "Error: Step two sub has wrong type.") for outputMod in stepTwoMergeAODWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoMergeAODWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") return
def testDependentReDigi(self): """ _testDependentReDigi_ Verfiy that a dependent ReDigi workflow that keeps stages out RAW data is created and installed into WMBS correctly. """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = self.injectReDigiConfigs() defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[1] defaultArguments["StepThreeConfigCacheID"] = configs[2] testWorkload = reDigiWorkload("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DWMWM") testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) topLevelFileset = Fileset(name = "TestWorkload-StepOneProc-SomeBlock") topLevelFileset.loadData() stepOneUnmergedRAWFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-RAWDEBUGoutput") stepOneUnmergedRAWFileset.loadData() stepOneMergedRAWFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/merged-Merged") stepOneMergedRAWFileset.loadData() stepOneLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-logArchive") stepOneLogArchiveFileset.loadData() stepOneMergeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/merged-logArchive") stepOneMergeLogArchiveFileset.loadData() stepTwoUnmergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-DQMoutput") stepTwoUnmergedDQMFileset.loadData() stepTwoUnmergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-RECODEBUGoutput") stepTwoUnmergedRECOFileset.loadData() stepTwoMergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput/merged-Merged") stepTwoMergedDQMFileset.loadData() stepTwoMergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/merged-Merged") stepTwoMergedRECOFileset.loadData() stepTwoLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-logArchive") stepTwoLogArchiveFileset.loadData() stepTwoMergeDQMLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput/merged-logArchive") stepTwoMergeDQMLogArchiveFileset.loadData() stepTwoMergeRECOLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/merged-logArchive") stepTwoMergeRECOLogArchiveFileset.loadData() stepThreeUnmergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/unmerged-aodOutputModule") stepThreeUnmergedAODFileset.loadData() stepThreeMergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule/merged-Merged") stepThreeMergedAODFileset.loadData() stepThreeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/unmerged-logArchive") stepThreeLogArchiveFileset.loadData() stepThreeMergeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule/merged-logArchive") stepThreeMergeLogArchiveFileset.loadData() stepOneWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc") stepOneWorkflow.load() self.assertEqual(stepOneWorkflow.wfType, 'redigi') self.assertTrue("logArchive" in stepOneWorkflow.outputMap.keys(), "Error: Step one missing output module.") self.assertTrue("RAWDEBUGoutput" in stepOneWorkflow.outputMap.keys(), "Error: Step one missing output module.") self.assertEqual(stepOneWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepOneLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepOneLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneWorkflow.outputMap["RAWDEBUGoutput"][0]["merged_output_fileset"].id, stepOneMergedRAWFileset.id, "Error: RAWDEBUG output fileset is wrong.") self.assertEqual(stepOneWorkflow.outputMap["RAWDEBUGoutput"][0]["output_fileset"].id, stepOneUnmergedRAWFileset.id, "Error: RAWDEBUG output fileset is wrong.") for outputMod in stepOneWorkflow.outputMap.keys(): self.assertTrue(len(stepOneWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepOneSub = Subscription(workflow = stepOneWorkflow, fileset = topLevelFileset) stepOneSub.loadData() self.assertEqual(stepOneSub["type"], "Processing", "Error: Step one sub has wrong type.") stepOneCleanupWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedRAWDEBUGoutput") stepOneCleanupWorkflow.load() self.assertEqual(len(stepOneCleanupWorkflow.outputMap.keys()), 0, "Error: Cleanup should have no output.") stepOneCleanupSub = Subscription(workflow = stepOneCleanupWorkflow, fileset = stepOneUnmergedRAWFileset) stepOneCleanupSub.loadData() self.assertEqual(stepOneCleanupSub["type"], "Cleanup", "Error: Step one sub has wrong type.") stepOneLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/LogCollect") stepOneLogCollectWorkflow.load() self.assertEqual(len(stepOneLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect should have no output.") stepOneLogCollectSub = Subscription(workflow = stepOneLogCollectWorkflow, fileset = stepOneLogArchiveFileset) stepOneLogCollectSub.loadData() self.assertEqual(stepOneLogCollectSub["type"], "LogCollect", "Error: Step one sub has wrong type.") stepOneMergeWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput") stepOneMergeWorkflow.load() self.assertTrue("Merged" in stepOneMergeWorkflow.outputMap.keys(), "Error: Step one merge missing output module.") self.assertTrue("logArchive" in stepOneMergeWorkflow.outputMap.keys(), "Error: Step one merge missing output module.") self.assertEqual(stepOneMergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepOneMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneMergeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepOneMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneMergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepOneMergedRAWFileset.id, "Error: RAWDEBUG merge output fileset is wrong.") self.assertEqual(stepOneMergeWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepOneMergedRAWFileset.id, "Error: RAWDEBUG merge output fileset is wrong.") for outputMod in stepOneMergeWorkflow.outputMap.keys(): self.assertTrue(len(stepOneMergeWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepOneMergeSub = Subscription(workflow = stepOneMergeWorkflow, fileset = stepOneUnmergedRAWFileset) stepOneMergeSub.loadData() self.assertEqual(stepOneMergeSub["type"], "Merge", "Error: Step one sub has wrong type.") stepTwoWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc") stepTwoWorkflow.load() self.assertTrue("RECODEBUGoutput" in stepTwoWorkflow.outputMap.keys(), "Error: Step two missing output module.") self.assertTrue("DQMoutput" in stepTwoWorkflow.outputMap.keys(), "Error: Step two missing output module.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["output_fileset"].id, stepTwoUnmergedRECOFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["output_fileset"].id, stepTwoUnmergedDQMFileset.id, "Error: DQM output fileset is wrong.") stepTwoSub = Subscription(workflow = stepTwoWorkflow, fileset = stepOneMergedRAWFileset) stepTwoSub.loadData() self.assertEqual(stepTwoSub["type"], "Processing", "Error: Step two sub has wrong type.") for outputMod in stepTwoWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepTwoCleanupDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcCleanupUnmergedDQMoutput") stepTwoCleanupDQMWorkflow.load() self.assertEqual(len(stepTwoCleanupDQMWorkflow.outputMap.keys()), 0, "Error: Cleanup shouldn't have any output.") stepTwoCleanupDQMSub = Subscription(workflow = stepTwoCleanupDQMWorkflow, fileset = stepTwoUnmergedDQMFileset) stepTwoCleanupDQMSub.loadData() self.assertEqual(stepTwoCleanupDQMSub["type"], "Cleanup", "Error: Step two sub has wrong type.") stepTwoCleanupRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcCleanupUnmergedRECODEBUGoutput") stepTwoCleanupRECOWorkflow.load() self.assertEqual(len(stepTwoCleanupRECOWorkflow.outputMap.keys()), 0, "Error: Cleanup shouldn't have any output.") stepTwoCleanupRECOSub = Subscription(workflow = stepTwoCleanupRECOWorkflow, fileset = stepTwoUnmergedRECOFileset) stepTwoCleanupRECOSub.loadData() self.assertEqual(stepTwoCleanupRECOSub["type"], "Cleanup", "Error: Step two sub has wrong type.") stepTwoLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcLogCollect") stepTwoLogCollectWorkflow.load() self.assertEqual(len(stepTwoLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect shouldn't have any output.") stepTwoLogCollectSub = Subscription(workflow = stepTwoLogCollectWorkflow, fileset = stepTwoLogArchiveFileset) stepTwoLogCollectSub.loadData() self.assertEqual(stepTwoLogCollectSub["type"], "LogCollect", "Error: Step two sub has wrong type.") stepTwoMergeRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput") stepTwoMergeRECOWorkflow.load() self.assertTrue("Merged" in stepTwoMergeRECOWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertTrue("logArchive" in stepTwoMergeRECOWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG merge output fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG merge output fileset is wrong.") stepTwoMergeRECOSub = Subscription(workflow = stepTwoMergeRECOWorkflow, fileset = stepTwoUnmergedRECOFileset) stepTwoMergeRECOSub.loadData() self.assertEqual(stepTwoMergeRECOSub["type"], "Merge", "Error: Step two sub has wrong type.") for outputMod in stepTwoMergeRECOWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoMergeRECOWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepTwoMergeDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput") stepTwoMergeDQMWorkflow.load() self.assertTrue("Merged" in stepTwoMergeDQMWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertTrue("logArchive" in stepTwoMergeDQMWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM merge output fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM merge output fileset is wrong.") stepTwoMergeDQMSub = Subscription(workflow = stepTwoMergeDQMWorkflow, fileset = stepTwoUnmergedDQMFileset) stepTwoMergeDQMSub.loadData() self.assertEqual(stepTwoMergeDQMSub["type"], "Merge", "Error: Step two sub has wrong type.") for outputMod in stepTwoMergeDQMWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoMergeDQMWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepThreeWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc") stepThreeWorkflow.load() self.assertTrue("aodOutputModule" in stepThreeWorkflow.outputMap.keys(), "Error: Step three missing output module.") self.assertTrue("logArchive" in stepThreeWorkflow.outputMap.keys(), "Error: Step three missing output module.") self.assertEqual(stepThreeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepThreeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepThreeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeWorkflow.outputMap["aodOutputModule"][0]["merged_output_fileset"].id, stepThreeMergedAODFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepThreeWorkflow.outputMap["aodOutputModule"][0]["output_fileset"].id, stepThreeUnmergedAODFileset.id, "Error: RECODEBUG output fileset is wrong.") stepThreeSub = Subscription(workflow = stepThreeWorkflow, fileset = stepTwoMergedRECOFileset) stepThreeSub.loadData() self.assertEqual(stepThreeSub["type"], "Processing", "Error: Step three sub has wrong type.") for outputMod in stepThreeWorkflow.outputMap.keys(): self.assertTrue(len(stepThreeWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepThreeCleanupWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcCleanupUnmergedaodOutputModule") stepThreeCleanupWorkflow.load() self.assertEqual(len(stepThreeCleanupWorkflow.outputMap.keys()), 0, "Error: Cleanup should have no output.") stepThreeCleanupSub = Subscription(workflow = stepThreeCleanupWorkflow, fileset = stepThreeUnmergedAODFileset) stepThreeCleanupSub.loadData() self.assertEqual(stepThreeCleanupSub["type"], "Cleanup", "Error: Step three sub has wrong type.") stepThreeLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcLogCollect") stepThreeLogCollectWorkflow.load() self.assertEqual(len(stepThreeLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect should have no output.") stepThreeLogCollectSub = Subscription(workflow = stepThreeLogCollectWorkflow, fileset = stepThreeLogArchiveFileset) stepThreeLogCollectSub.loadData() self.assertEqual(stepThreeLogCollectSub["type"], "LogCollect", "Error: Step three sub has wrong type.") stepThreeMergeWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule") stepThreeMergeWorkflow.load() self.assertTrue("Merged" in stepThreeMergeWorkflow.outputMap.keys(), "Error: Step three merge missing output module.") self.assertTrue("logArchive" in stepThreeMergeWorkflow.outputMap.keys(), "Error: Step three merge missing output module.") self.assertEqual(stepThreeMergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepThreeMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeMergeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepThreeMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeMergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepThreeMergedAODFileset.id, "Error: AOD merge output fileset is wrong.") self.assertEqual(stepThreeMergeWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepThreeMergedAODFileset.id, "Error: AOD merge output fileset is wrong.") stepThreeMergeSub = Subscription(workflow = stepThreeMergeWorkflow, fileset = stepThreeUnmergedAODFileset) stepThreeMergeSub.loadData() self.assertEqual(stepThreeMergeSub["type"], "Merge", "Error: Step three sub has wrong type.") for outputMod in stepThreeMergeWorkflow.outputMap.keys(): self.assertTrue(len(stepThreeMergeWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") return
def testAnalysis(self): """ _testAnalysis_ """ defaultArguments = AnalysisWorkloadFactory.getTestArguments() defaultArguments["CouchUrl"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "analysis_t" defaultArguments["AnalysisConfigCacheDoc"] = self.injectAnalysisConfig( ) defaultArguments["ProcessingVersion"] = 1 analysisProcessingFactory = AnalysisWorkloadFactory() testWorkload = analysisProcessingFactory.factoryWorkloadConstruction( "TestWorkload", defaultArguments) testWMBSHelper = WMBSHelper(testWorkload, "Analysis", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Analysis") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") logArchOutput = procWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] #Actually Analysis does not have a merge task unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Analysis/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Analysis/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") output = procWorkflow.outputMap["output"][0]["output_fileset"] mergedOutput = procWorkflow.outputMap["output"][0][ "merged_output_fileset"] output.loadData() mergedOutput.loadData() self.assertEqual( output.name, "/TestWorkload/Analysis/unmerged-output", "Error: Unmerged output fileset is wrong: " + output.name) self.assertEqual( mergedOutput.name, "/TestWorkload/Analysis/unmerged-output", "Error: Unmerged output fileset is wrong: " + mergedOutput.name) topLevelFileset = Fileset(name="TestWorkload-Analysis-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Analysis", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "EventBased", "Error: Wrong split algo.") procLogCollect = Fileset( name="/TestWorkload/Analysis/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Analysis/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.")
def databaseWork(self): """ Queries DB for all watched filesets, if a filesets matches become available, create the subscriptions """ # Get all watched workflows availableWorkflows = self.getUnsubscribedWorkflows.execute() logging.debug("Found %s unsubscribed managed workflows" \ % len(availableWorkflows)) # Get all filesets to check if they match a wrokflow availableFilesets = self.getAllFilesets.execute() logging.debug("Found %s filesets" % len(availableFilesets)) # Loop on unsubscribed workflows to match filesets for managedWorkflow in availableWorkflows: # Workflow object cache to pass into Subscription constructor wfObj = None for fileset in availableFilesets: # Fileset object cache fsObj = None # Load the location information #whitelist = Set() #blacklist = Set() # Location is only caf #locations = self.queries.getLocations(managedWorkflow['id']) #for location in locations: # if bool(int(location['valid'])) == True: # whitelist.add(location['site_name']) # else: # blacklist.add(location['site_name']) # Attempt to match workflows to filesets if re.match(managedWorkflow['fileset_match'], fileset['name']): # Log in debug msg = "Creating subscription for %s to workflow id %s" msg %= (fileset['name'], managedWorkflow['workflow']) logging.debug(msg) # Match found - Load the fileset if not already loaded if not fsObj: fsObj = Fileset(id=fileset['id']) fsObj.load() # Load the workflow if not already loaded if not wfObj: wfObj = Workflow(id=managedWorkflow['workflow']) wfObj.load() # Create the subscription newSub = Subscription(fileset = fsObj, \ workflow = wfObj, \ #whitelist = whitelist, \ #blacklist = blacklist, \ split_algo = managedWorkflow['split_algo'], type = managedWorkflow['type']) newSub.create() managedWorkflows = self.getManagedWorkflows.execute() logging.debug("Found %s managed workflows" \ % len(managedWorkflows)) unsubscribedFilesets = self.getUnsubscribedFilesets.execute() logging.debug("Found %s unsubscribed filesets" % \ len(unsubscribedFilesets)) # Loop on unsubscribed filesets to match workflows for unsubscribedFileset in unsubscribedFilesets: # Workflow object cache to pass into Subscription constructor # FIXME wfObj = None for managedWork in managedWorkflows: logging.debug("The workflow %s" % managedWork['workflow']) # Fileset object cache wfObj = None fsObj = None # Load the location information #whitelist = Set() #blacklist = Set() # Location is only caf #locations = self.queries.getLocations(managedWorkflow['id']) #for location in locations: # if bool(int(location['valid'])) == True: # whitelist.add(location['site_name']) # else: # blacklist.add(location['site_name']) # Attempt to match workflows to filesets if re.match(managedWork['fileset_match'], \ unsubscribedFileset['name']): # Log in debug msg = "Creating subscription for %s to workflow id %s" msg %= (unsubscribedFileset['name'], \ managedWork['workflow']) logging.debug(msg) # Match found - Load the fileset if not already loaded if not fsObj: fsObj = Fileset(id=unsubscribedFileset['id']) fsObj.load() # Load the workflow if not already loaded if not wfObj: wfObj = Workflow(id=managedWork['workflow']) wfObj.load() # Create the subscription newSub = Subscription(fileset = fsObj, \ workflow = wfObj, \ #whitelist = whitelist, \ #blacklist = blacklist, \ split_algo = managedWork['split_algo'], type = managedWork['type']) newSub.create() newSub.load()
def testMonteCarloFromGEN(self): """ _testMonteCarloFromGEN_ Create a MonteCarloFromGEN workflow and verify it installs into WMBS correctly. """ arguments = MonteCarloFromGENWorkloadFactory.getTestArguments() arguments["ConfigCacheID"] = self.injectConfig() arguments["CouchDBName"] = "mclhe_t" arguments["PrimaryDataset"] = "WaitThisIsNotMinimumBias" factory = MonteCarloFromGENWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", arguments) outputDatasets = testWorkload.listOutputDatasets() self.assertEqual(len(outputDatasets), 2) self.assertTrue("/WaitThisIsNotMinimumBias/FAKE-FilterRECO-FAKE-v1/RECO" in outputDatasets) self.assertTrue("/WaitThisIsNotMinimumBias/FAKE-FilterALCARECO-FAKE-v1/ALCARECO" in outputDatasets) productionTask = testWorkload.getTaskByPath('/TestWorkload/MonteCarloFromGEN') splitting = productionTask.jobSplittingParameters() self.assertFalse(splitting["deterministicPileup"]) testWMBSHelper = WMBSHelper(testWorkload, "MonteCarloFromGEN", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") self.assertEqual(procWorkflow.wfType, 'production') goldenOutputMods = ["outputRECORECO", "outputALCARECOALCARECO"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-MonteCarloFromGEN-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Production", "Error: Wrong subscription type: %s" % procSubscription["type"]) self.assertEqual(procSubscription["split_algo"], "EventAwareLumiBased", "Error: Wrong split algo.") unmergedReco = Fileset(name="/TestWorkload/MonteCarloFromGEN/unmerged-outputRECORECO") unmergedReco.loadData() recoMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO") recoMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedReco, workflow=recoMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) unmergedAlca = Fileset(name="/TestWorkload/MonteCarloFromGEN/unmerged-outputALCARECOALCARECO") unmergedAlca.loadData() alcaMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO") alcaMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedAlca, workflow=alcaMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) for procOutput in ["outputRECORECO", "outputALCARECOALCARECO"]: unmerged = Fileset(name="/TestWorkload/MonteCarloFromGEN/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name="/TestWorkload/MonteCarloFromGEN/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset( name="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/MonteCarloFromGENoutputRECORECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset( name="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/MonteCarloFromGENoutputALCARECOALCARECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def _stepTwoTaskTest(self): """ _stepTwoTaskTest_ """ alcaRecoTask = Workflow(name = "TestWorkload", task = "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo") alcaRecoTask.load() self.assertEqual(len(alcaRecoTask.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") # output modules goldenOutputMods = ["OutputC", "OutputD"] for o in goldenOutputMods: mergedOutput = alcaRecoTask.outputMap[o][0]["merged_output_fileset"] unmergedOutput = alcaRecoTask.outputMap[o][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-Merged" % o, ("Error: Merged output fileset is wrong: %s" % mergedOutput.name)) self.assertEqual(unmergedOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/unmerged-%s" % o, ("Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)) logArchOutput = alcaRecoTask.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = alcaRecoTask.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/unmerged-logArchive", ("Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/unmerged-logArchive", ("Error: LogArchive output fileset is wrong: %s" % unmergedLogArchOutput.name)) for o in goldenOutputMods: mergeTask = Workflow(name = "TestWorkload", task = "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s" % o) mergeTask.load() self.assertEqual(len(mergeTask.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeTask.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeTask.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-Merged" % o, ("Error: Merged output fileset is wrong: %s" % mergedMergeOutput.name)) self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-Merged" % o, ("Error: Unmerged output fileset is wrong: %s" % unmergedMergeOutput.name)) logArchOutput = mergeTask.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeTask.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-logArchive" % o, ("Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-logArchive" % o, ("Error: LogArchive output fileset is wrong: %s" % unmergedLogArchOutput.name)) for o in goldenOutputMods: unmerged = Fileset(name = "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/unmerged-%s" % o) unmerged.loadData() mergeTask = Workflow(name = "TestWorkload", task = "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s" % o) mergeTask.load() mergeSubscription = Subscription(fileset = unmerged, workflow = mergeTask) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for o in goldenOutputMods: unmerged = Fileset(name = "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/unmerged-%s" % o) unmerged.loadData() cleanupTask = Workflow(name = "TestWorkload", task = "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoCleanupUnmerged%s" % o) cleanupTask.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupTask) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for o in goldenOutputMods: alcaRecoMergeLogCollect = Fileset(name = "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-logArchive" % o) alcaRecoMergeLogCollect.loadData() alcaRecoMergeLogCollectTask = Workflow(name = "TestWorkload", task = "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/StepTwo%sMergeLogCollect" % (o, o)) alcaRecoMergeLogCollectTask.load() logCollectSub = Subscription(fileset = alcaRecoMergeLogCollect, workflow = alcaRecoMergeLogCollectTask) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.")
def _checkTask(self, task, taskConf, centralConf): """ _checkTask_ Verify the correctness of the task """ if taskConf.get("InputTask") is not None: inpTaskPath = task.getPathName() inpTaskPath = inpTaskPath.replace(task.name(), "") inpTaskPath += "cmsRun1" self.assertEqual(task.data.input.inputStep, inpTaskPath, "Input step is wrong in the spec") self.assertTrue( taskConf["InputTask"] in inpTaskPath, "Input task is not in the path name for child task") if "MCPileup" in taskConf or "DataPileup" in taskConf: mcDataset = taskConf.get('MCPileup', None) dataDataset = taskConf.get('DataPileup', None) if mcDataset: self.assertEqual(task.data.steps.cmsRun1.pileup.mc.dataset, [mcDataset]) if dataDataset: self.assertEqual(task.data.steps.cmsRun1.pileup.data.dataset, [dataDataset]) workflow = Workflow(name=self.workload.name(), task=task.getPathName()) workflow.load() outputMods = outputModuleList(task) ignoredOutputMods = task.getIgnoredOutputModulesForTask() outputMods = set(outputMods) - set(ignoredOutputMods) self.assertEqual(len(workflow.outputMap.keys()), len(outputMods), "Error: Wrong number of WF outputs") for outputModule in outputMods: filesets = workflow.outputMap[outputModule][0] merged = filesets['merged_output_fileset'] unmerged = filesets['output_fileset'] merged.loadData() unmerged.loadData() mergedset = task.getPathName() + "/" + task.name( ) + "Merge" + outputModule + "/merged-Merged" if outputModule == "logArchive" or not taskConf.get("KeepOutput", True) \ or outputModule in taskConf.get("TransientOutputModules", []) or outputModule in centralConf.get("IgnoredOutputModules", []): mergedset = task.getPathName() + "/unmerged-" + outputModule unmergedset = task.getPathName() + "/unmerged-" + outputModule self.assertEqual(mergedset, merged.name, "Merged fileset name is wrong") self.assertEqual(unmergedset, unmerged.name, "Unmerged fileset name is wrong") if outputModule != "logArchive" and taskConf.get("KeepOutput", True) \ and outputModule not in taskConf.get("TransientOutputModules", []) \ and outputModule not in centralConf.get("IgnoredOutputModules", []): mergeTask = task.getPathName() + "/" + task.name( ) + "Merge" + outputModule mergeWorkflow = Workflow(name=self.workload.name(), task=mergeTask) mergeWorkflow.load() self.assertTrue( "Merged" in mergeWorkflow.outputMap, "Merge workflow does not contain a Merged output key") mergedOutputMod = mergeWorkflow.outputMap['Merged'][0] mergedFileset = mergedOutputMod['merged_output_fileset'] unmergedFileset = mergedOutputMod['output_fileset'] mergedFileset.loadData() unmergedFileset.loadData() self.assertEqual(mergedFileset.name, mergedset, "Merged fileset name in merge task is wrong") self.assertEqual( unmergedFileset.name, mergedset, "Unmerged fileset name in merge task is wrong") mrgLogArch = mergeWorkflow.outputMap['logArchive'][0][ 'merged_output_fileset'] umrgLogArch = mergeWorkflow.outputMap['logArchive'][0][ 'output_fileset'] mrgLogArch.loadData() umrgLogArch.loadData() archName = task.getPathName() + "/" + task.name( ) + "Merge" + outputModule + "/merged-logArchive" self.assertEqual( mrgLogArch.name, archName, "LogArchive merged fileset name is wrong in merge task") self.assertEqual( umrgLogArch.name, archName, "LogArchive unmerged fileset name is wrong in merge task") if outputModule != "logArchive": taskOutputMods = task.getOutputModulesForStep( stepName="cmsRun1") currentModule = getattr(taskOutputMods, outputModule) if taskConf.get("PrimaryDataset") is not None: self.assertEqual(currentModule.primaryDataset, taskConf["PrimaryDataset"], "Wrong primary dataset") processedDatasetParts = [ "AcquisitionEra, ProcessingString, ProcessingVersion" ] allParts = True for part in processedDatasetParts: if part in taskConf: self.assertTrue(part in currentModule.processedDataset, "Wrong processed dataset for module") else: allParts = False if allParts: self.assertEqual( "%s-%s-v%s" % (taskConf["AcquisitionEra"], taskConf["ProcessingString"], taskConf["ProcessingVersion"]), "Wrong processed dataset for module") # Test subscriptions if taskConf.get("InputTask") is None: inputFileset = "%s-%s-SomeBlock" % (self.workload.name(), task.name()) elif "Merge" in task.getPathName().split("/")[-2]: inpTaskPath = task.getPathName().replace(task.name(), "") inputFileset = inpTaskPath + "merged-Merged" else: inpTaskPath = task.getPathName().replace(task.name(), "") inputFileset = inpTaskPath + "unmerged-%s" % taskConf[ "InputFromOutputModule"] taskFileset = Fileset(name=inputFileset) taskFileset.loadData() taskSubscription = Subscription(fileset=taskFileset, workflow=workflow) taskSubscription.loadData() if taskConf.get("InputTask") is None and taskConf.get( "InputDataset") is None: # Production type self.assertEqual( taskSubscription["type"], "Production", "Error: Wrong subscription type for processing task") self.assertEqual(taskSubscription["split_algo"], taskConf["SplittingAlgo"], "Error: Wrong split algo for generation task") else: # Processing type self.assertEqual(taskSubscription["type"], "Processing", "Wrong subscription type for task") if taskSubscription["split_algo"] != "WMBSMergeBySize": self.assertEqual(taskSubscription["split_algo"], taskConf['SplittingAlgo'], "Splitting algo mismatch") else: self.assertEqual( taskFileset.name, inpTaskPath + "unmerged-%s" % taskConf["InputFromOutputModule"], "Subscription uses WMBSMergeBySize on a merge fileset") return
def testReReco(self): """ _testReReco_ Verify that ReReco workflows can be created and inserted into WMBS correctly. """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = ReRecoWorkloadFactory.getTestArguments() dataProcArguments["ProcessingString"] = "ProcString" dataProcArguments["ConfigCacheID"] = recoConfig dataProcArguments.update({"SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": skimConfig}) dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["EnableHarvesting"] = True dataProcArguments["DQMConfigCacheID"] = recoConfig factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", dataProcArguments) self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children.DataProcessingMergeRECOoutput.\ tree.children.SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules.\ Merged.mergedLFNBase, '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["RECOoutput", "DQMoutput"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-DataProcessing-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "EventAwareLumiBased", "Error: Wrong split algo.") unmergedReco = Fileset(name = "/TestWorkload/DataProcessing/unmerged-RECOoutput") unmergedReco.loadData() recoMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput") recoMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedReco, workflow = recoMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") unmergedDqm = Fileset(name = "/TestWorkload/DataProcessing/unmerged-DQMoutput") unmergedDqm.loadData() dqmMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput") dqmMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedDqm, workflow = dqmMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for procOutput in ["RECOoutput", "DQMoutput"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/DataProcessingRECOoutputMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") skimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["SkimA", "SkimB"] for goldenOutputMod in goldenOutputMods: mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-Merged") topLevelFileset.loadData() skimSubscription = Subscription(fileset = topLevelFileset, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset = unmerged, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: skimMergeLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset = skimMergeLogCollect, workflow = skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") dqmWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged") dqmWorkflow.load() topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-Merged") topLevelFileset.loadData() dqmSubscription = Subscription(fileset = topLevelFileset, workflow = dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmHarvestLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive") dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/DataProcessingMergeDQMoutputMergedEndOfRunDQMHarvestLogCollect") dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset = dqmHarvestLogCollect, workflow = dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def setupPromptRecoWorkflow(self): """ _setupPromptRecoWorkflow_ Populate WMBS with a real PromptReco workflow, every subscription must be unfinished at first """ # Populate disk and WMBS testArguments = PromptRecoWorkloadFactory.getTestArguments() workflowName = 'PromptReco_Run195360_Cosmics' factory = PromptRecoWorkloadFactory() testArguments["EnableHarvesting"] = True testArguments["CouchURL"] = os.environ["COUCHURL"] workload = factory.factoryWorkloadConstruction(workflowName, testArguments) wmbsHelper = WMBSHelper(workload, 'Reco', 'SomeBlock', cachepath=self.testDir) wmbsHelper.createTopLevelFileset() wmbsHelper._createSubscriptionsInWMBS(wmbsHelper.topLevelTask, wmbsHelper.topLevelFileset) self.stateMap = { 'AlcaSkim': [], 'Merge': [], 'Harvesting': [], 'Processing Done': [] } self.orderedStates = [ 'AlcaSkim', 'Merge', 'Harvesting', 'Processing Done' ] # Populate WMStats self.requestDBWriter.insertGenericRequest( {'RequestName': workflowName}) self.requestDBWriter.updateRequestStatus(workflowName, 'Closed') topLevelTask = '/%s/Reco' % workflowName alcaSkimTask = '%s/AlcaSkim' % topLevelTask mergeTasks = [ '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics', '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T', '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics', '%s/RecoMergewrite_AOD', '%s/RecoMergewrite_DQM', '%s/RecoMergewrite_RECO' ] harvestingTask = '%s/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged' % topLevelTask self.stateMap['AlcaSkim'].append(wmbsHelper.topLevelSubscription) alcaSkimWorkflow = Workflow(name=workflowName, task=alcaSkimTask) alcaSkimWorkflow.load() alcarecoFileset = Fileset( name= '/PromptReco_Run195360_Cosmics/Reco/unmerged-write_ALCARECOALCARECO' ) alcarecoFileset.load() alcaSkimSub = Subscription(alcarecoFileset, alcaSkimWorkflow) alcaSkimSub.load() self.stateMap['Merge'].append(alcaSkimSub) for task in mergeTasks: mergeTask = task % topLevelTask mergeWorkflow = Workflow(name=workflowName, task=mergeTask) mergeWorkflow.load() if 'AlcaSkim' in mergeTask: stream = mergeTask.split('/')[-1][13:] unmergedFileset = Fileset(name='%s/unmerged-%sALCARECO' % (alcaSkimTask, stream)) unmergedFileset.load() else: dataTier = mergeTask.split('/')[-1].split('_')[-1] unmergedFileset = Fileset(name='%s/unmerged-write_%s%s' % (topLevelTask, dataTier, dataTier)) unmergedFileset.load() mergeSub = Subscription(unmergedFileset, mergeWorkflow) mergeSub.load() self.stateMap['Harvesting'].append(mergeSub) harvestingWorkflow = Workflow(name=workflowName, task=harvestingTask) harvestingWorkflow.load() harvestingFileset = Fileset( name= '/PromptReco_Run195360_Cosmics/Reco/RecoMergewrite_DQM/merged-MergedDQM' ) harvestingFileset.load() harvestingSub = Subscription(harvestingFileset, harvestingWorkflow) harvestingSub.load() self.stateMap['Processing Done'].append(harvestingSub) return
def pollSubscriptions(self): """ Poller for looking in all active subscriptions for jobs that need to be made. """ logging.info("Beginning JobCreator.pollSubscriptions() cycle.") myThread = threading.currentThread() #First, get list of Subscriptions subscriptions = self.subscriptionList.execute() # Okay, now we have a list of subscriptions for subscriptionID in subscriptions: wmbsSubscription = Subscription(id=subscriptionID) try: wmbsSubscription.load() except IndexError: # This happens when the subscription no longer exists # i.e., someone executed a kill() function on the database # while the JobCreator was in cycle # Ignore this subscription msg = "JobCreator cannot load subscription %i" % subscriptionID logging.error(msg) self.sendAlert(6, msg=msg) continue workflow = Workflow(id=wmbsSubscription["workflow"].id) workflow.load() wmbsSubscription['workflow'] = workflow wmWorkload = retrieveWMSpec(workflow=workflow) if not workflow.task or not wmWorkload: # Then we have a problem # We NEED a sandbox # Abort this subscription! # But do NOT fail # We have no way of marking a subscription as bad per se # We'll have to just keep skipping it msg = "Have no task for workflow %i\n" % (workflow.id) msg += "Aborting Subscription %i" % (subscriptionID) logging.error(msg) self.sendAlert(1, msg=msg) continue logging.debug("Have loaded subscription %i with workflow %i\n" % (subscriptionID, workflow.id)) # Set task object wmTask = wmWorkload.getTaskByPath(workflow.task) # Get generators # If you fail to load the generators, pass on the job try: if hasattr(wmTask.data, 'generators'): manager = GeneratorManager(wmTask) seederList = manager.getGeneratorList() else: seederList = [] except Exception, ex: msg = "Had failure loading generators for subscription %i\n" % ( subscriptionID) msg += "Exception: %s\n" % str(ex) msg += "Passing over this error. It will reoccur next interation!\n" msg += "Please check or remove this subscription!\n" logging.error(msg) self.sendAlert(6, msg=msg) continue logging.debug( "Going to call wmbsJobFactory for sub %i with limit %i" % (subscriptionID, self.limit)) splitParams = retrieveJobSplitParams(wmWorkload, workflow.task) logging.debug("Split Params: %s" % splitParams) # My hope is that the job factory is smart enough only to split un-split jobs splitterFactory = SplitterFactory( splitParams.get('algo_package', "WMCore.JobSplitting")) wmbsJobFactory = splitterFactory(package="WMCore.WMBS", subscription=wmbsSubscription, generators=seederList, limit=self.limit) # Turn on the jobFactory wmbsJobFactory.open() # Create a function to hold it jobSplittingFunction = runSplitter(jobFactory=wmbsJobFactory, splitParams=splitParams) # Now we get to find out how many jobs there are. jobNumber = self.countJobs.execute(workflow=workflow.id, conn=myThread.transaction.conn, transaction=True) jobNumber += splitParams.get('initial_lfn_counter', 0) logging.debug("Have %i jobs for workflow %s already in database." % (jobNumber, workflow.name)) continueSubscription = True while continueSubscription: # This loop runs over the jobFactory, # using yield statements and a pre-existing proxy to # generate and process new jobs # First we need the jobs. myThread.transaction.begin() try: wmbsJobGroups = jobSplittingFunction.next() logging.info("Retrieved %i jobGroups from jobSplitter" % (len(wmbsJobGroups))) except StopIteration: # If you receive a stopIteration, we're done logging.info("Completed iteration over subscription %i" % (subscriptionID)) continueSubscription = False myThread.transaction.commit() break # If we have no jobGroups, we're done if len(wmbsJobGroups) == 0: logging.info( "Found end in iteration over subscription %i" % (subscriptionID)) continueSubscription = False myThread.transaction.commit() break # Assemble a dict of all the info processDict = { 'workflow': workflow, 'wmWorkload': wmWorkload, 'wmTaskName': wmTask.getPathName(), 'jobNumber': jobNumber, 'sandbox': wmTask.data.input.sandbox, 'wmTaskPrio': wmTask.getTaskPriority(), 'owner': wmWorkload.getOwner().get('name', None), 'ownerDN': wmWorkload.getOwner().get('dn', None), 'ownerGroup': wmWorkload.getOwner().get('vogroup', ''), 'ownerRole': wmWorkload.getOwner().get('vorole', '') } tempSubscription = Subscription(id=wmbsSubscription['id']) nameDictList = [] for wmbsJobGroup in wmbsJobGroups: # For each jobGroup, put a dictionary # together and run it with creatorProcess jobsInGroup = len(wmbsJobGroup.jobs) wmbsJobGroup.subscription = tempSubscription tempDict = {} tempDict.update(processDict) tempDict['jobGroup'] = wmbsJobGroup tempDict['swVersion'] = wmTask.getSwVersion() tempDict['scramArch'] = wmTask.getScramArch() tempDict['jobNumber'] = jobNumber tempDict['agentNumber'] = self.agentNumber jobGroup = creatorProcess(work=tempDict, jobCacheDir=self.jobCacheDir) jobNumber += jobsInGroup # Set jobCache for group for job in jobGroup.jobs: nameDictList.append({ 'jobid': job['id'], 'cacheDir': job['cache_dir'] }) job["user"] = wmWorkload.getOwner()["name"] job["group"] = wmWorkload.getOwner()["group"] # Set the caches in the database try: if len(nameDictList) > 0: self.setBulkCache.execute( jobDictList=nameDictList, conn=myThread.transaction.conn, transaction=True) except WMException: raise except Exception, ex: msg = "Unknown exception while setting the bulk cache:\n" msg += str(ex) logging.error(msg) self.sendAlert(6, msg=msg) logging.debug( "Error while setting bulkCache with following values: %s\n" % nameDictList) raise JobCreatorException(msg) # Advance the jobGroup in changeState for wmbsJobGroup in wmbsJobGroups: self.advanceJobGroup(wmbsJobGroup=wmbsJobGroup) # Now end the transaction so that everything is wrapped # in a single rollback myThread.transaction.commit()
def testReReco(self): """ _testReReco_ Verify that ReReco workflows can be created and inserted into WMBS correctly. The ReReco workflow is just a DataProcessing workflow with skims tacked on. We'll test the skims and DQMHarvest here. """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = getTestArguments() dataProcArguments['ProcessingString'] = 'ProcString' dataProcArguments['ConfigCacheID'] = recoConfig dataProcArguments["SkimConfigs"] = [{"SkimName": "SomeSkim", "SkimInput": "RECOoutput", "SkimSplitAlgo": "FileBased", "SkimSplitArgs": {"files_per_job": 1, "include_parents": True}, "ConfigCacheID": skimConfig, "Scenario": None}] dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" testWorkload = rerecoWorkload("TestWorkload", dataProcArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children.DataProcessingMergeRECOoutput.\ tree.children.SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules.\ Merged.mergedLFNBase, '/store/data/WMAgentCommissioning10/MinimumBias/USER/SkimBFilter-ProcString-v2') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) skimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["SkimA", "SkimB"] for goldenOutputMod in goldenOutputMods: mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-Merged") topLevelFileset.loadData() skimSubscription = Subscription(fileset = topLevelFileset, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset = unmerged, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: skimMergeLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset = skimMergeLogCollect, workflow = skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") dqmWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged") dqmWorkflow.load() topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-Merged") topLevelFileset.loadData() dqmSubscription = Subscription(fileset = topLevelFileset, workflow = dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmHarvestLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/unmerged-logArchive") dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputEndOfRunDQMHarvestMerged/DataProcessingMergeDQMoutputMergedEndOfRunDQMHarvestLogCollect") dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset = dqmHarvestLogCollect, workflow = dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testPromptRecoWithSkims(self): """ _testT1PromptRecoWithSkim_ Create a T1 Prompt Reconstruction workflow with PromptSkims and verify it installs into WMBS correctly. """ testArguments = PromptRecoWorkloadFactory.getTestArguments() testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["EnableHarvesting"] = True factory = PromptRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) recoWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco") recoWorkflow.load() self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1, "Error: Wrong number of WF outputs in the Reco WF.") goldenOutputMods = {"write_RECO": "RECO", "write_ALCARECO": "ALCARECO", "write_AOD": "AOD", "write_DQM": "DQM"} for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergedOutput = recoWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = recoWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_ALCARECO": self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = recoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim") alcaSkimWorkflow.load() self.assertEqual(len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: fset = goldenOutputMod + "ALCARECO" mergedOutput = alcaSkimWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged") dqmWorkflow.load() logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = {"write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM"} for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["MergedALCARECO"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["MergedALCARECO"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-MergedALCARECO" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Reco-SomeBlock") topLevelFileset.loadData() recoSubscription = Subscription(fileset=topLevelFileset, workflow=recoWorkflow) recoSubscription.loadData() self.assertEqual(recoSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(recoSubscription["split_algo"], "EventAwareLumiBased", "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"]) alcaRecoFileset = Fileset(name="/TestWorkload/Reco/unmerged-write_ALCARECOALCARECO") alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset=alcaRecoFileset, workflow=alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(alcaSkimSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedDQMFileset = Fileset(name="/TestWorkload/Reco/RecoMergewrite_DQM/merged-MergedDQM") mergedDQMFileset.loadData() dqmSubscription = Subscription(fileset=mergedDQMFileset, workflow=dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") unmergedOutputs = {"write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM"} for unmergedOutput, tier in unmergedOutputs.items(): fset = unmergedOutput + tier unmergedDataTier = Fileset(name="/TestWorkload/Reco/unmerged-%s" % fset) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = [] for alcaProd in testArguments["AlcaSkims"]: unmergedOutputs.append("ALCARECOStream%s" % alcaProd) for unmergedOutput in unmergedOutputs: unmergedAlcaSkim = Fileset(name="/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % unmergedOutput) unmergedAlcaSkim.loadData() alcaSkimMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput) alcaSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedAlcaSkim, workflow=alcaSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = {"write_RECO": "RECO", "write_AOD": "AOD", "write_DQM": "DQM"} for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier unmergedFileset = Fileset(name="/TestWorkload/Reco/unmerged-%s" % fset) unmergedFileset.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name="/TestWorkload/Reco/AlcaSkim/unmerged-%sALCARECO" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") recoLogCollect = Fileset(name="/TestWorkload/Reco/unmerged-logArchive") recoLogCollect.loadData() recoLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/LogCollect") recoLogCollectWorkflow.load() logCollectSub = Subscription(fileset=recoLogCollect, workflow=recoLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset(name="/TestWorkload/Reco/AlcaSkim/unmerged-logArchive") alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect") alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: recoMergeLogCollect = Fileset(name="/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod) recoMergeLogCollect.loadData() recoMergeLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % ( goldenOutputMod, goldenOutputMod)) recoMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset=recoMergeLogCollect, workflow=recoMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: alcaSkimLogCollect = Fileset( name="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % ( goldenOutputMod, goldenOutputMod)) alcaSkimLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") dqmHarvestLogCollect = Fileset( name="/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/unmerged-logArchive") dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged/RecoMergewrite_DQMMergedEndOfRunDQMHarvestLogCollect") dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset=dqmHarvestLogCollect, workflow=dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def _generationTaskTest(self): # retrieve task from the installed workflow genTask = Workflow(name = "TestWorkload", task = "/TestWorkload/Generation") genTask.load() self.assertEqual(len(genTask.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") # output modules goldenOutputMods = ["OutputA"] for o in goldenOutputMods: mergedOutput = genTask.outputMap[o][0]["merged_output_fileset"] unmergedOutput = genTask.outputMap[o][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Generation/GenerationMerge%s/merged-Merged" % o, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Generation/unmerged-%s" % o, "Error: Unmerged output fileset is wrong.") logArchOutput = genTask.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = genTask.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Generation/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Generation/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for o in goldenOutputMods: mergeTask = Workflow(name = "TestWorkload", task = "/TestWorkload/Generation/GenerationMerge%s" % o) mergeTask.load() self.assertEqual(len(mergeTask.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeTask.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeTask.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Generation/GenerationMerge%s/merged-Merged" % o, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Generation/GenerationMerge%s/merged-Merged" % o, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeTask.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeTask.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Generation/GenerationMerge%s/merged-logArchive" % o, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Generation/GenerationMerge%s/merged-logArchive" % o, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-Generation-SomeBlock") topLevelFileset.loadData() prodSubscription = Subscription(fileset = topLevelFileset, workflow = genTask) prodSubscription.loadData() self.assertEqual(prodSubscription["type"], "Production", "Error: Wrong subscription type.") self.assertEqual(prodSubscription["split_algo"], "EventBased", "Error: Wrong split algo.") for o in goldenOutputMods: unmergedOutput = Fileset(name = "/TestWorkload/Generation/unmerged-%s" % o) unmergedOutput.loadData() mergeTask = Workflow(name = "TestWorkload", task = "/TestWorkload/Generation/GenerationMerge%s" % o) mergeTask.load() mergeSubscription = Subscription(fileset = unmergedOutput, workflow = mergeTask) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) for o in goldenOutputMods: unmerged = Fileset(name = "/TestWorkload/Generation/unmerged-%s" % o) unmerged.loadData() cleanupTask = Workflow(name = "TestWorkload", task = "/TestWorkload/Generation/GenerationCleanupUnmerged%s" % o) cleanupTask.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupTask) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") genLogCollect = Fileset(name = "/TestWorkload/Generation/unmerged-logArchive") genLogCollect.loadData() genLogCollectTask = Workflow(name = "TestWorkload", task = "/TestWorkload/Generation/GenLogCollect") genLogCollectTask.load() logCollectSub = Subscription(fileset = genLogCollect, workflow = genLogCollectTask) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") for o in goldenOutputMods: mergeLogCollect = Fileset(name = "/TestWorkload/Generation/GenerationMerge%s/merged-logArchive" % o) mergeLogCollect.loadData() mergeLogCollectTask = Workflow(name = "TestWorkload", task = "/TestWorkload/Generation/GenerationMerge%s/Generation%sMergeLogCollect" % (o, o)) mergeLogCollectTask.load() logCollectSub = Subscription(fileset = mergeLogCollect, workflow = mergeLogCollectTask) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.")
def testMonteCarloFromGEN(self): """ _testMonteCarloFromGEN_ Create a MonteCarloFromGEN workflow and verify it installs into WMBS correctly. """ arguments = getTestArguments() arguments["ProcConfigCacheID"] = self.injectConfig() arguments["CouchDBName"] = "mclhe_t" testWorkload = monteCarloFromGENWorkload("TestWorkload", arguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "MonteCarloFromGEN", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") self.assertEqual(procWorkflow.wfType, 'lheproduction') goldenOutputMods = ["outputRECORECO", "outputALCARECOALCARECO"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-MonteCarloFromGEN-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Production", "Error: Wrong subscription type: %s" % procSubscription["type"]) self.assertEqual(procSubscription["split_algo"], "LumiBased", "Error: Wrong split algo.") unmergedReco = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputRECORECO") unmergedReco.loadData() recoMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO") recoMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedReco, workflow = recoMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) unmergedAlca = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputALCARECOALCARECO") unmergedAlca.loadData() alcaMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO") alcaMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlca, workflow = alcaMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) for procOutput in ["outputRECORECO", "outputALCARECOALCARECO"]: unmerged = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/MonteCarloFromGENoutputRECORECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/MonteCarloFromGENoutputALCARECOALCARECOMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def databaseWork(self): """ Queries DB for all watched filesets, if a filesets matches become available, create the subscriptions """ # Get all watched workflows availableWorkflows = self.getUnsubscribedWorkflows.execute() logging.debug("Found %s unsubscribed managed workflows" \ % len(availableWorkflows)) # Get all filesets to check if they match a wrokflow availableFilesets = self.getAllFilesets.execute() logging.debug("Found %s filesets" % len(availableFilesets)) # Loop on unsubscribed workflows to match filesets for managedWorkflow in availableWorkflows: # Workflow object cache to pass into Subscription constructor wfObj = None for fileset in availableFilesets: # Fileset object cache fsObj = None # Load the location information #whitelist = Set() #blacklist = Set() # Location is only caf #locations = self.queries.getLocations(managedWorkflow['id']) #for location in locations: # if bool(int(location['valid'])) == True: # whitelist.add(location['site_name']) # else: # blacklist.add(location['site_name']) # Attempt to match workflows to filesets if re.match(managedWorkflow['fileset_match'], fileset['name']): # Log in debug msg = "Creating subscription for %s to workflow id %s" msg %= (fileset['name'], managedWorkflow['workflow']) logging.debug(msg) # Match found - Load the fileset if not already loaded if not fsObj: fsObj = Fileset(id = fileset['id']) fsObj.load() # Load the workflow if not already loaded if not wfObj: wfObj = Workflow(id = managedWorkflow['workflow']) wfObj.load() # Create the subscription newSub = Subscription(fileset = fsObj, \ workflow = wfObj, \ #whitelist = whitelist, \ #blacklist = blacklist, \ split_algo = managedWorkflow['split_algo'], type = managedWorkflow['type']) newSub.create() managedWorkflows = self.getManagedWorkflows.execute() logging.debug("Found %s managed workflows" \ % len(managedWorkflows)) unsubscribedFilesets = self.getUnsubscribedFilesets.execute() logging.debug("Found %s unsubscribed filesets" % \ len(unsubscribedFilesets)) # Loop on unsubscribed filesets to match workflows for unsubscribedFileset in unsubscribedFilesets: # Workflow object cache to pass into Subscription constructor # FIXME wfObj = None for managedWork in managedWorkflows: logging.debug("The workflow %s" %managedWork['workflow']) # Fileset object cache wfObj = None fsObj = None # Load the location information #whitelist = Set() #blacklist = Set() # Location is only caf #locations = self.queries.getLocations(managedWorkflow['id']) #for location in locations: # if bool(int(location['valid'])) == True: # whitelist.add(location['site_name']) # else: # blacklist.add(location['site_name']) # Attempt to match workflows to filesets if re.match(managedWork['fileset_match'], \ unsubscribedFileset['name']): # Log in debug msg = "Creating subscription for %s to workflow id %s" msg %= (unsubscribedFileset['name'], \ managedWork['workflow']) logging.debug(msg) # Match found - Load the fileset if not already loaded if not fsObj: fsObj = Fileset(id = unsubscribedFileset['id']) fsObj.load() # Load the workflow if not already loaded if not wfObj: wfObj = Workflow(id = managedWork['workflow']) wfObj.load() # Create the subscription newSub = Subscription(fileset = fsObj, \ workflow = wfObj, \ #whitelist = whitelist, \ #blacklist = blacklist, \ split_algo = managedWork['split_algo'], type = managedWork['type']) newSub.create() newSub.load()
def killWorkflows(self, workflows): """ _killWorkflows_ Delete all the information in couch and WMBS about the given workflow, go through all subscriptions and delete one by one. The input is a dictionary with workflow names as keys, fully loaded WMWorkloads and subscriptions lists as values """ for workflow in workflows: logging.info("Deleting workflow %s" % workflow) try: #Get the task-workflow ids, sort them by ID, #higher ID first so we kill #the leaves of the tree first, root last workflowsIDs = workflows[workflow]["workflows"].keys() workflowsIDs.sort(reverse=True) #Now go through all tasks and load the WMBS workflow objects wmbsWorkflows = [] for wfID in workflowsIDs: wmbsWorkflow = Workflow(id=wfID) wmbsWorkflow.load() wmbsWorkflows.append(wmbsWorkflow) #Time to shoot one by one for wmbsWorkflow in wmbsWorkflows: if self.uploadPublishInfo: self.createAndUploadPublish(wmbsWorkflow) #Load all the associated subscriptions and shoot them one by one subIDs = workflows[workflow]["workflows"][wmbsWorkflow.id] for subID in subIDs: subscription = Subscription(id=subID) subscription['workflow'] = wmbsWorkflow subscription.load() subscription.deleteEverything() #Check that the workflow is gone if wmbsWorkflow.exists(): #Something went bad, this workflow #should be gone by now msg = "Workflow %s, Task %s was not deleted completely" % ( wmbsWorkflow.name, wmbsWorkflow.task) raise TaskArchiverPollerException(msg) #Now delete directories _, taskDir = getMasterName(startDir=self.jobCacheDir, workflow=wmbsWorkflow) logging.info("About to delete work directory %s" % taskDir) if os.path.exists(taskDir): if os.path.isdir(taskDir): shutil.rmtree(taskDir) else: # What we think of as a working directory is not a directory # This should never happen and there is no way we can recover # from this here. Bail out now and have someone look at things. msg = "Work directory is not a directory, this should never happen: %s" % taskDir raise TaskArchiverPollerException(msg) else: msg = "Attempted to delete work directory but it was already gone: %s" % taskDir logging.debug(msg) spec = workflows[workflow]["spec"] topTask = spec.getTopLevelTask()[0] # Now take care of the sandbox sandbox = getattr(topTask.data.input, 'sandbox', None) if sandbox: sandboxDir = os.path.dirname(sandbox) if os.path.isdir(sandboxDir): shutil.rmtree(sandboxDir) logging.debug("Sandbox dir deleted") else: logging.error( "Attempted to delete sandbox dir but it was already gone: %s" % sandboxDir) except Exception, ex: msg = "Critical error while deleting workflow %s\n" % workflow msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) self.sendAlert(2, msg=msg)
def _checkTask(self, task, taskConf): """ _checkTask_ Give the provided task instance the once over, make sure parentage is set correctly etc """ if taskConf.has_key("InputTask"): inpTask = taskConf['InputTask'] inpMod = taskConf['InputFromOutputModule'] inpTaskPath = task.getPathName() inpTaskPath = inpTaskPath.replace(task.name(), "") inpTaskPath += "cmsRun1" self.assertEqual(task.data.input.inputStep, inpTaskPath) workflow = Workflow(name = self.workload.name(), task = task.getPathName()) workflow.load() mods = outputModuleList(task) for mod in mods: filesets = workflow.outputMap[mod][0] merged = filesets['merged_output_fileset'] unmerged = filesets['output_fileset'] merged.loadData() unmerged.loadData() mergedset = task.getPathName() + "/" + task.name() + "Merge" + mod + "/merged-Merged" if mod == "logArchive": mergedset = task.getPathName() + "/unmerged-" + mod unmergedset = task.getPathName() + "/unmerged-" + mod self.failUnless(mergedset == merged.name) self.failUnless(unmergedset == unmerged.name) if mod == "logArchive": continue mergeTask = task.getPathName() + "/" + task.name() + "Merge" + mod mergeWorkflow = Workflow(name = self.workload.name(), task = mergeTask) mergeWorkflow.load() if not mergeWorkflow.outputMap.has_key("Merged"): msg = "Merge workflow does not contain a Merged output key" self.fail(msg) mrgFileset = mergeWorkflow.outputMap['Merged'][0] mergedFS = mrgFileset['merged_output_fileset'] unmergedFS = mrgFileset['output_fileset'] mergedFS.loadData() unmergedFS.loadData() self.assertEqual(mergedFS.name, mergedset) self.assertEqual(unmergedFS.name, mergedset) mrgLogArch = mergeWorkflow.outputMap['logArchive'][0]['merged_output_fileset'] umrgLogArch = mergeWorkflow.outputMap['logArchive'][0]['output_fileset'] mrgLogArch.loadData() umrgLogArch.loadData() archName = task.getPathName() + "/" + task.name() + "Merge" + mod + "/merged-logArchive" self.assertEqual(mrgLogArch.name, archName) self.assertEqual(umrgLogArch.name,archName) # # test subscriptions made by this task if it is the first task # if taskConf.has_key("InputDataset") or taskConf.has_key("PrimaryDataset"): taskFileset = Fileset(name = "%s-%s-SomeBlock" % (self.workload.name(), task.name() )) taskFileset.loadData() taskSubscription = Subscription(fileset = taskFileset, workflow = workflow) taskSubscription.loadData() if taskConf.has_key("PrimaryDataset"): # generator type self.assertEqual(taskSubscription["type"], "Production", "Error: Wrong subscription type.") self.assertEqual(taskSubscription["split_algo"], taskConf["SplittingAlgorithm"], "Error: Wrong split algo.") if taskConf.has_key("InputDataset"): # processor type self.assertEqual(taskSubscription["type"], "Processing", "Wrong subscription type for processing first task") self.assertEqual(taskSubscription["split_algo"], taskConf['SplittingAlgorithm'], "Splitting algo mismatch")
def testRepack(self): """ _testRepack_ Create a Repack workflow and verify it installs into WMBS correctly. """ testArguments = RepackWorkloadFactory.getTestArguments() testArguments.update(deepcopy(REQUEST)) factory = RepackWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Repack", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) repackWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Repack") repackWorkflow.load() self.assertEqual(len(repackWorkflow.outputMap.keys()), len(testArguments["Outputs"]) + 1, "Error: Wrong number of WF outputs in the Repack WF.") goldenOutputMods = {"write_PrimaryDataset1_RAW": "RAW", "write_PrimaryDataset2_RAW": "RAW"} for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergedOutput = repackWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = repackWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_PrimaryDataset1_RAW": self.assertEqual(mergedOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Repack/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = repackWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = repackWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Repack/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Repack/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod, tier in goldenOutputMods.items(): mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Repack/RepackMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Repack") topLevelFileset.loadData() repackSubscription = Subscription(fileset=topLevelFileset, workflow=repackWorkflow) repackSubscription.loadData() self.assertEqual(repackSubscription["type"], "Repack", "Error: Wrong subscription type.") self.assertEqual(repackSubscription["split_algo"], "Repack", "Error: Wrong split algorithm. %s" % repackSubscription["split_algo"]) unmergedOutputs = {"write_PrimaryDataset1_RAW": "RAW", "write_PrimaryDataset2_RAW": "RAW"} for unmergedOutput, tier in unmergedOutputs.items(): fset = unmergedOutput + tier unmergedDataTier = Fileset(name="/TestWorkload/Repack/unmerged-%s" % fset) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Repack/RepackMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "RepackMerge", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier unmergedFileset = Fileset(name="/TestWorkload/Repack/unmerged-%s" % fset) unmergedFileset.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Repack/RepackCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") repackLogCollect = Fileset(name="/TestWorkload/Repack/unmerged-logArchive") repackLogCollect.loadData() repackLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Repack/LogCollect") repackLogCollectWorkflow.load() logCollectSub = Subscription(fileset=repackLogCollect, workflow=repackLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") for goldenOutputMod, tier in goldenOutputMods.items(): repackMergeLogCollect = Fileset( name="/TestWorkload/Repack/RepackMerge%s/merged-logArchive" % goldenOutputMod) repackMergeLogCollect.loadData() repackMergeLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Repack/RepackMerge%s/Repack%sMergeLogCollect" % ( goldenOutputMod, goldenOutputMod)) repackMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset=repackMergeLogCollect, workflow=repackMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") return
def testPromptRecoWithSkims(self): """ _testT1PromptRecoWithSkim_ Create a T1 Prompt Reconstruction workflow with PromptSkims and verify it installs into WMBS correctly. """ self.setupPromptSkimConfigObject() testArguments = getTestArguments() testArguments["PromptSkims"] = [self.promptSkim] testArguments["CouchURL"] = os.environ["COUCHURL"] testArguments["CouchDBName"] = "promptreco_t" testArguments["EnvPath"] = os.environ.get("EnvPath", None) testArguments["BinPath"] = os.environ.get("BinPath", None) testWorkload = promptrecoWorkload("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) recoWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco") recoWorkflow.load() self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1, "Error: Wrong number of WF outputs in the Reco WF.") goldenOutputMods = ["write_RECO", "write_ALCARECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_ALCARECO": self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = recoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim") alcaSkimWorkflow.load() self.assertEqual(len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") promptSkimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1") promptSkimWorkflow.load() self.assertEqual(len(promptSkimWorkflow.outputMap.keys()), 6, "Error: Wrong number of WF outputs.") goldenOutputMods = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5"] for goldenOutputMod in goldenOutputMods: mergedOutput = promptSkimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = promptSkimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = promptSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = promptSkimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5"] for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-Reco-SomeBlock") topLevelFileset.loadData() recoSubscription = Subscription(fileset = topLevelFileset, workflow = recoWorkflow) recoSubscription.loadData() self.assertEqual(recoSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(recoSubscription["split_algo"], "EventBased", "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"]) alcaRecoFileset = Fileset(name = "/TestWorkload/Reco/unmerged-write_ALCARECO") alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset = alcaRecoFileset, workflow = alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(alcaSkimSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedRecoFileset = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/merged-Merged") mergedRecoFileset.loadData() promptSkimSubscription = Subscription(fileset = mergedRecoFileset, workflow = promptSkimWorkflow) promptSkimSubscription.loadData() self.assertEqual(promptSkimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(promptSkimSubscription["split_algo"], "FileBased", "Error: Wrong split algorithm. %s" % promptSkimSubscription["split_algo"]) unmergedOutputs = ["write_RECO", "write_AOD", "write_DQM"] for unmergedOutput in unmergedOutputs: unmergedDataTier = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % unmergedOutput) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedDataTier, workflow = dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = [] for alcaProd in testArguments["AlcaSkims"]: unmergedOutputs.append("ALCARECOStream%s" % alcaProd) for unmergedOutput in unmergedOutputs: unmergedAlcaSkim = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % unmergedOutput) unmergedAlcaSkim.loadData() alcaSkimMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput) alcaSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlcaSkim, workflow = alcaSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5"] for unmergedOutput in unmergedOutputs: unmergedPromptSkim = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s" % unmergedOutput) unmergedPromptSkim.loadData() promptSkimMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s" % unmergedOutput) promptSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedPromptSkim, workflow = promptSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM", "write_ALCARECO"] for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" %goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5"] for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s" % unmergedOutput) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1CleanupUnmerged%s" % unmergedOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algorithm. %s" % cleanupSubscription["split_algo"]) recoLogCollect = Fileset(name = "/TestWorkload/Reco/unmerged-logArchive") recoLogCollect.loadData() recoLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/LogCollect") recoLogCollectWorkflow.load() logCollectSub = Subscription(fileset = recoLogCollect, workflow = recoLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive") alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect") alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") promptSkimLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive") promptSkimLogCollect.loadData() promptSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1LogCollect") promptSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset = promptSkimLogCollect, workflow = promptSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: recoMergeLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod) recoMergeLogCollect.loadData() recoMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) recoMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = recoMergeLogCollect, workflow = recoMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) alcaSkimLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4", "fakeSkimOut5"] for goldenOutputMod in goldenOutputMods: promptSkimMergeLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive" % goldenOutputMod) promptSkimMergeLogCollect.loadData() promptSkimMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/TestSkim1%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) promptSkimMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = promptSkimMergeLogCollect, workflow = promptSkimMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") return
def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName = 'site1', seName = 'goodse.cern.ch', ceName = 'site1', plugin = "TestPlugin") resourceControl.insertSite(siteName = 'site2', seName = 'goodse2.cern.ch', ceName = 'site2', plugin = "TestPlugin") testWorkload = self.createTestWMSpec() testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock", cachepath = self.workDir) testWMBSHelper.createSubscription() testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") testResubmitWMBSHelper = WMBSHelper(testWorkload, "SomeBlock2", cachepath = self.workDir) testResubmitWMBSHelper.createSubscription() mergeWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow(name = "ResubmitTestWorkload", task = "/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual(mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual(unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual(mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset = topLevelFileset, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return
def testCreateSubscription(self): """ _testCreateSubscription_ Verify that the subscription creation code works correctly. """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', seName='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testTopLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask") procWorkflow.load() self.assertEqual(procWorkflow.owner, "sfoulkes", "Error: Wrong owner: %s" % procWorkflow.owner) self.assertEqual(procWorkflow.group, "DMWM", "Error: Wrong group: %s" % procWorkflow.group) self.assertEqual(procWorkflow.wfType, "TestReReco", "Error: Wrong type.") self.assertEqual( procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(procWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") mergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "merged_output_fileset"] unmergedProcOutput = procWorkflow.outputMap["OutputA"][0][ "output_fileset"] mergedProcOutput.loadData() unmergedProcOutput.loadData() self.assertEqual( mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Merged output fileset is wrong.") self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA", "Error: Unmerged output fileset is wrong.") mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/CleanupTask") cleanupWorkflow.load() self.assertEqual(cleanupWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/ProcessingTask/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-ProcessingTask-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(len(procSubscription.getWhiteBlackList()), 2, "Error: Wrong site white/black list for proc sub.") for site in procSubscription.getWhiteBlackList(): if site["site_name"] == "site1": self.assertEqual(site["valid"], 1, "Error: Site should be white listed.") else: self.assertEqual(site["valid"], 0, "Error: Site should be black listed.") self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") mergeSubscription = Subscription(fileset=unmergedProcOutput, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return
def testPrivateMC(self): """ _testAnalysis_ """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "privatemc_t" defaultArguments["AnalysisConfigCacheDoc"] = self.injectAnalysisConfig( ) defaultArguments["ProcessingVersion"] = 1 processingFactory = PrivateMCWorkloadFactory() testWorkload = processingFactory("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "PrivateMC", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/PrivateMC") procWorkflow.load() self.assertEqual( len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs: %s" % len(procWorkflow.outputMap.keys())) logArchOutput = procWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] #Actually Analysis does not have a merge task unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/PrivateMC/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/PrivateMC/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["OutputA", "OutputB"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/PrivateMC/unmerged-%s" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/PrivateMC/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-PrivateMC-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset=topLevelFileset, workflow=procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "PrivateMC", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "EventBased", "Error: Wrong split algo.") procLogCollect = Fileset( name="/TestWorkload/PrivateMC/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/PrivateMC/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset=procLogCollect, workflow=procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.")
def testReRecoDroppingRECO(self): """ _testReRecoDroppingRECO_ Verify that ReReco workflows can be created and inserted into WMBS correctly. The ReReco workflow is just a DataProcessing workflow with skims tacked on. This tests run on unmerged RECO output """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = ReRecoWorkloadFactory.getTestArguments() dataProcArguments["ProcessingString"] = "ProcString" dataProcArguments["ConfigCacheID"] = recoConfig dataProcArguments.update({ "SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": skimConfig }) dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["TransientOutputModules"] = ["RECOoutput"] dataProcArguments["EnableHarvesting"] = True dataProcArguments["DQMConfigCacheID"] = self.injectDQMHarvestConfig() factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", dataProcArguments) self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children. \ SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules. \ Merged.mergedLFNBase, '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) skimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["SkimA", "SkimB"] for goldenOutputMod in goldenOutputMods: mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0][ "merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset( name="/TestWorkload/DataProcessing/unmerged-RECOoutput") topLevelFileset.loadData() skimSubscription = Subscription(fileset=topLevelFileset, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset( name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmerged, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: unmerged = Fileset( name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" % skimOutput) unmerged.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in ["A", "B"]: skimMergeLogCollect = Fileset( name= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=skimMergeLogCollect, workflow=skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def setupPromptRecoWorkflow(self): """ _setupPromptRecoWorkflow_ Populate WMBS with a real PromptReco workflow, every subscription must be unfinished at first """ # Populate disk and WMBS testArguments = PromptRecoWorkloadFactory.getTestArguments() workflowName = 'PromptReco_Run195360_Cosmics' factory = PromptRecoWorkloadFactory() testArguments["EnableHarvesting"] = True testArguments["CouchURL"] = os.environ["COUCHURL"] workload = factory.factoryWorkloadConstruction(workflowName, testArguments) wmbsHelper = WMBSHelper(workload, 'Reco', 'SomeBlock', cachepath=self.testDir) wmbsHelper.createTopLevelFileset() wmbsHelper._createSubscriptionsInWMBS(wmbsHelper.topLevelTask, wmbsHelper.topLevelFileset) self.stateMap = {'AlcaSkim': [], 'Merge': [], 'Harvesting': [], 'Processing Done': []} self.orderedStates = ['AlcaSkim', 'Merge', 'Harvesting', 'Processing Done'] # Populate WMStats self.requestDBWriter.insertGenericRequest({'RequestName': workflowName}) self.requestDBWriter.updateRequestStatus(workflowName, 'Closed') topLevelTask = '/%s/Reco' % workflowName alcaSkimTask = '%s/AlcaSkim' % topLevelTask mergeTasks = ['%s/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics', '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T', '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics', '%s/RecoMergewrite_AOD', '%s/RecoMergewrite_DQM', '%s/RecoMergewrite_RECO'] harvestingTask = '%s/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged' % topLevelTask self.stateMap['AlcaSkim'].append(wmbsHelper.topLevelSubscription) alcaSkimWorkflow = Workflow(name=workflowName, task=alcaSkimTask) alcaSkimWorkflow.load() alcarecoFileset = Fileset(name='/PromptReco_Run195360_Cosmics/Reco/unmerged-write_ALCARECOALCARECO') alcarecoFileset.load() alcaSkimSub = Subscription(alcarecoFileset, alcaSkimWorkflow) alcaSkimSub.load() self.stateMap['Merge'].append(alcaSkimSub) for task in mergeTasks: mergeTask = task % topLevelTask mergeWorkflow = Workflow(name=workflowName, task=mergeTask) mergeWorkflow.load() if 'AlcaSkim' in mergeTask: stream = mergeTask.split('/')[-1][13:] unmergedFileset = Fileset(name='%s/unmerged-%sALCARECO' % (alcaSkimTask, stream)) unmergedFileset.load() else: dataTier = mergeTask.split('/')[-1].split('_')[-1] unmergedFileset = Fileset(name='%s/unmerged-write_%s%s' % (topLevelTask, dataTier, dataTier)) unmergedFileset.load() mergeSub = Subscription(unmergedFileset, mergeWorkflow) mergeSub.load() self.stateMap['Harvesting'].append(mergeSub) harvestingWorkflow = Workflow(name=workflowName, task=harvestingTask) harvestingWorkflow.load() harvestingFileset = Fileset(name='/PromptReco_Run195360_Cosmics/Reco/RecoMergewrite_DQM/merged-MergedDQM') harvestingFileset.load() harvestingSub = Subscription(harvestingFileset, harvestingWorkflow) harvestingSub.load() self.stateMap['Processing Done'].append(harvestingSub) return
def testExpress(self): """ _testExpress_ Create an Express workflow and verify it installs into WMBS correctly. """ testArguments = ExpressWorkloadFactory.getTestArguments() testArguments.update(deepcopy(REQUEST)) testArguments['RecoCMSSWVersion'] = "CMSSW_9_0_0" factory = ExpressWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction( "TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Express", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) expressWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Express") expressWorkflow.load() self.assertEqual( len(expressWorkflow.outputMap.keys()), len(testArguments["Outputs"]) + 1, "Error: Wrong number of WF outputs in the Express WF.") goldenOutputMods = { "write_PrimaryDataset1_FEVT": "FEVT", "write_StreamExpress_ALCARECO": "ALCARECO", "write_StreamExpress_DQMIO": "DQMIO" } for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergedOutput = expressWorkflow.outputMap[fset][0][ "merged_output_fileset"] unmergedOutput = expressWorkflow.outputMap[fset][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_StreamExpress_ALCARECO": self.assertEqual( mergedOutput.name, "/TestWorkload/Express/ExpressMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/Express/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = expressWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = expressWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Express/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Express/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO" ) alcaSkimWorkflow.load() self.assertEqual( len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = { "ALCARECOStreamPromptCalibProd": "ALCAPROMPT", "ALCARECOStreamTkAlMinBias": "ALCARECO" } for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergedOutput = alcaSkimWorkflow.outputMap[fset][0][ "merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[fset][0][ "output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-%s" % fset, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual( unmergedOutput.name, "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for dqmString in [ "ExpressMergewrite_StreamExpress_DQMIOEndOfRunDQMHarvestMerged", "ExpressMergewrite_StreamExpress_DQMIOPeriodicDQMHarvestMerged" ]: dqmTask = "/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/%s" % dqmString dqmWorkflow = Workflow(name="TestWorkload", task=dqmTask) dqmWorkflow.load() logArchOutput = dqmWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "%s/unmerged-logArchive" % dqmTask, "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "%s/unmerged-logArchive" % dqmTask, "Error: LogArchive output fileset is wrong.") goldenOutputMods = { "write_PrimaryDataset1_FEVT": "FEVT", "write_StreamExpress_DQMIO": "DQMIO" } for goldenOutputMod, tier in goldenOutputMods.items(): mergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Express/ExpressMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap[ "Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/Express/ExpressMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/Express/ExpressMerge%s/merged-Merged%s" % (goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][ "output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Express/ExpressMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Express/ExpressMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="TestWorkload-Express") topLevelFileset.loadData() expressSubscription = Subscription(fileset=topLevelFileset, workflow=expressWorkflow) expressSubscription.loadData() self.assertEqual(expressSubscription["type"], "Express", "Error: Wrong subscription type.") self.assertEqual( expressSubscription["split_algo"], "Express", "Error: Wrong split algorithm. %s" % expressSubscription["split_algo"]) alcaRecoFileset = Fileset( name= "/TestWorkload/Express/unmerged-write_StreamExpress_ALCARECOALCARECO" ) alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset=alcaRecoFileset, workflow=alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Express", "Error: Wrong subscription type.") self.assertEqual( alcaSkimSubscription["split_algo"], "ExpressMerge", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedDQMFileset = Fileset( name= "/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/merged-MergedDQMIO" ) mergedDQMFileset.loadData() dqmSubscription = Subscription(fileset=mergedDQMFileset, workflow=dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") unmergedOutputs = { "write_PrimaryDataset1_FEVT": "FEVT", "write_StreamExpress_DQMIO": "DQMIO" } for unmergedOutput, tier in unmergedOutputs.items(): fset = unmergedOutput + tier unmergedDataTier = Fileset( name="/TestWorkload/Express/unmerged-%s" % fset) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Express/ExpressMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset=unmergedDataTier, workflow=dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual( mergeSubscription["split_algo"], "ExpressMerge", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = { "write_PrimaryDataset1_FEVT": "FEVT", "write_StreamExpress_ALCARECO": "ALCARECO", "write_StreamExpress_DQMIO": "DQMIO" } for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier unmergedFileset = Fileset( name="/TestWorkload/Express/unmerged-%s" % fset) unmergedFileset.loadData() cleanupWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Express/ExpressCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmergedFileset, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") expressLogCollect = Fileset( name="/TestWorkload/Express/unmerged-logArchive") expressLogCollect.loadData() expressLogCollectWorkflow = Workflow( name="TestWorkload", task="/TestWorkload/Express/ExpressLogCollect") expressLogCollectWorkflow.load() logCollectSub = Subscription(fileset=expressLogCollect, workflow=expressLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset( name= "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/unmerged-logArchive" ) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Express/ExpressAlcaSkimwrite_StreamExpress_ALCARECO/AlcaSkimLogCollect" ) alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset=alcaSkimLogCollect, workflow=alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [ "write_PrimaryDataset1_FEVT", "write_StreamExpress_DQMIO" ] for goldenOutputMod in goldenOutputMods: expressMergeLogCollect = Fileset( name="/TestWorkload/Express/ExpressMerge%s/merged-logArchive" % goldenOutputMod) expressMergeLogCollect.loadData() expressMergeLogCollectWorkflow = Workflow( name="TestWorkload", task= "/TestWorkload/Express/ExpressMerge%s/Express%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) expressMergeLogCollectWorkflow.load() logCollectSubscription = Subscription( fileset=expressMergeLogCollect, workflow=expressMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") for dqmStrings in [ ("ExpressMergewrite_StreamExpress_DQMIOEndOfRunDQMHarvestMerged", "ExpressMergewrite_StreamExpress_DQMIOMergedEndOfRunDQMHarvestLogCollect" ), ("ExpressMergewrite_StreamExpress_DQMIOPeriodicDQMHarvestMerged", "ExpressMergewrite_StreamExpress_DQMIOMergedPeriodicDQMHarvestLogCollect" ) ]: dqmFileset = "/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/%s/unmerged-logArchive" % \ dqmStrings[0] dqmHarvestLogCollect = Fileset(name=dqmFileset) dqmHarvestLogCollect.loadData() dqmTask = "/TestWorkload/Express/ExpressMergewrite_StreamExpress_DQMIO/%s/%s" % dqmStrings dqmHarvestLogCollectWorkflow = Workflow(name="TestWorkload", task=dqmTask) dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset=dqmHarvestLogCollect, workflow=dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testDataProcessing(self): """ _testDataProcessing_ Create a data processing workflow and verify it installs into WMBS correctly. Check that we can drop an output module. """ testArgs = getTestArguments() testArgs['TransientOutputModules'] = ['RECOoutput'] testWorkload = dataProcessingWorkload("TestWorkload", testArgs) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["RECOoutput", "ALCARECOoutput"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod in testArgs["TransientOutputModules"]: self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) else: self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: if goldenOutputMod in testArgs["TransientOutputModules"]: # No merge for this output module continue mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-DataProcessing-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "LumiBased", "Error: Wrong split algo.") unmergedReco = Fileset(name = "/TestWorkload/DataProcessing/unmerged-RECOoutput") unmergedReco.loadData() recoMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput") # No merge workflow should exist in WMBS self.assertRaises(IndexError, recoMergeWorkflow.load) unmergedAlca = Fileset(name = "/TestWorkload/DataProcessing/unmerged-ALCARECOoutput") unmergedAlca.loadData() alcaMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput") alcaMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlca, workflow = alcaMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for procOutput in ["RECOoutput", "ALCARECOoutput"]: unmerged = Fileset(name = "/TestWorkload/DataProcessing/unmerged-%s" % procOutput) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingCleanupUnmerged%s" % procOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput/merged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput/DataProcessingALCARECOoutputMergeLogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testCreateLoadWithUserAtt(self): """ _testLoad_ Create a workflow and then try to load it from the database using the following load methods: Workflow.LoadFromName Workflow.LoadFromID Workflow.LoadFromSpecOwner Test if the Workflow is created correctly. """ testWorkflowA = Workflow(spec="spec.xml", owner="Simon", owner_vogroup='integration', owner_vorole='priority', name="wf001", task='Test', wfType="ReReco") testWorkflowA.create() testWorkflowB = Workflow(name="wf001", task='Test') testWorkflowB.load() self.assertTrue((testWorkflowA.id == testWorkflowB.id) and (testWorkflowA.name == testWorkflowB.name) and (testWorkflowA.spec == testWorkflowB.spec) and (testWorkflowA.task == testWorkflowB.task) and (testWorkflowA.wfType == testWorkflowB.wfType) and (testWorkflowA.owner == testWorkflowB.owner), "ERROR: Workflow.LoadFromName Failed") testWorkflowC = Workflow(id=testWorkflowA.id) testWorkflowC.load() self.assertTrue((testWorkflowA.id == testWorkflowC.id) and (testWorkflowA.name == testWorkflowC.name) and (testWorkflowA.spec == testWorkflowC.spec) and (testWorkflowA.task == testWorkflowC.task) and (testWorkflowA.wfType == testWorkflowC.wfType) and (testWorkflowA.owner == testWorkflowC.owner), "ERROR: Workflow.LoadFromID Failed") testWorkflowD = Workflow(spec="spec.xml", owner="Simon", task='Test') testWorkflowD.load() self.assertTrue((testWorkflowA.id == testWorkflowD.id) and (testWorkflowA.name == testWorkflowD.name) and (testWorkflowA.spec == testWorkflowD.spec) and (testWorkflowA.task == testWorkflowD.task) and (testWorkflowA.wfType == testWorkflowD.wfType) and (testWorkflowA.owner == testWorkflowD.owner), "ERROR: Workflow.LoadFromSpecOwner Failed") testWorkflowE = Workflow(spec="spec_1.xml", owner="Simon", owner_vogroup='t1access', owner_vorole='t1access', name="wf002", task='Test_1', wfType="ReReco") testWorkflowE.create() self.assertTrue((testWorkflowE.id != testWorkflowA.id) and (testWorkflowE.name != testWorkflowA.name) and (testWorkflowE.spec != testWorkflowA.spec) and (testWorkflowE.vogroup != testWorkflowA.vogroup) and (testWorkflowE.vorole != testWorkflowA.vorole) and (testWorkflowE.task != testWorkflowA.task) and (testWorkflowE.wfType == testWorkflowA.wfType) and (testWorkflowE.owner == testWorkflowA.owner), "ERROR: Workflow.LoadFromSpecOwner Failed") testWorkflowA.delete() return
def pollSubscriptions(self): """ Poller for looking in all active subscriptions for jobs that need to be made. """ logging.info("Beginning JobCreator.pollSubscriptions() cycle.") myThread = threading.currentThread() # First, get list of Subscriptions subscriptions = self.subscriptionList.execute() # Okay, now we have a list of subscriptions for subscriptionID in subscriptions: wmbsSubscription = Subscription(id=subscriptionID) try: wmbsSubscription.load() except IndexError: # This happens when the subscription no longer exists # i.e., someone executed a kill() function on the database # while the JobCreator was in cycle # Ignore this subscription msg = "JobCreator cannot load subscription %i" % subscriptionID logging.error(msg) continue workflow = Workflow(id=wmbsSubscription["workflow"].id) workflow.load() wmbsSubscription['workflow'] = workflow wmWorkload = retrieveWMSpec(workflow=workflow) if not workflow.task or not wmWorkload: # Then we have a problem # We NEED a sandbox # Abort this subscription! # But do NOT fail # We have no way of marking a subscription as bad per se # We'll have to just keep skipping it msg = "Have no task for workflow %i\n" % (workflow.id) msg += "Aborting Subscription %i" % (subscriptionID) logging.error(msg) continue logging.debug("Have loaded subscription %i with workflow %i\n", subscriptionID, workflow.id) # retrieve information from the workload to propagate down to the job configuration allowOpport = wmWorkload.getAllowOpportunistic() # Set task object wmTask = wmWorkload.getTaskByPath(workflow.task) # Get generators # If you fail to load the generators, pass on the job try: if hasattr(wmTask.data, 'generators'): manager = GeneratorManager(wmTask) seederList = manager.getGeneratorList() else: seederList = [] except Exception as ex: msg = "Had failure loading generators for subscription %i\n" % (subscriptionID) msg += "Exception: %s\n" % str(ex) msg += "Passing over this error. It will reoccur next interation!\n" msg += "Please check or remove this subscription!\n" logging.error(msg) continue logging.debug("Going to call wmbsJobFactory for sub %i with limit %i", subscriptionID, self.limit) splitParams = retrieveJobSplitParams(wmWorkload, workflow.task) logging.debug("Split Params: %s", splitParams) # Load the proper job splitting module splitterFactory = SplitterFactory(splitParams.get('algo_package', "WMCore.JobSplitting")) # and return an instance of the splitting algorithm wmbsJobFactory = splitterFactory(package="WMCore.WMBS", subscription=wmbsSubscription, generators=seederList, limit=self.limit) # Turn on the jobFactory --> get available files for that subscription, keep result proxies wmbsJobFactory.open() # Create a function to hold it, calling __call__ from the JobFactory # which then calls algorithm method of the job splitting algo instance jobSplittingFunction = runSplitter(jobFactory=wmbsJobFactory, splitParams=splitParams) # Now we get to find out how many jobs there are. jobNumber = self.countJobs.execute(workflow=workflow.id, conn=myThread.transaction.conn, transaction=True) jobNumber += splitParams.get('initial_lfn_counter', 0) logging.debug("Have %i jobs for workflow %s already in database.", jobNumber, workflow.name) continueSubscription = True while continueSubscription: # This loop runs over the jobFactory, # using yield statements and a pre-existing proxy to # generate and process new jobs # First we need the jobs. myThread.transaction.begin() try: wmbsJobGroups = next(jobSplittingFunction) logging.info("Retrieved %i jobGroups from jobSplitter", len(wmbsJobGroups)) except StopIteration: # If you receive a stopIteration, we're done logging.info("Completed iteration over subscription %i", subscriptionID) continueSubscription = False myThread.transaction.commit() break # If we have no jobGroups, we're done if len(wmbsJobGroups) == 0: logging.info("Found end in iteration over subscription %i", subscriptionID) continueSubscription = False myThread.transaction.commit() break # Assemble a dict of all the info processDict = {'workflow': workflow, 'wmWorkload': wmWorkload, 'wmTaskName': wmTask.getPathName(), 'jobNumber': jobNumber, 'sandbox': wmTask.data.input.sandbox, 'owner': wmWorkload.getOwner().get('name', None), 'ownerDN': wmWorkload.getOwner().get('dn', None), 'ownerGroup': wmWorkload.getOwner().get('vogroup', ''), 'ownerRole': wmWorkload.getOwner().get('vorole', ''), 'numberOfCores': 1, 'inputDataset': wmTask.getInputDatasetPath(), 'inputPileup': wmTask.getInputPileupDatasets()} try: maxCores = 1 stepNames = wmTask.listAllStepNames() for stepName in stepNames: sh = wmTask.getStep(stepName) maxCores = max(maxCores, sh.getNumberOfCores()) processDict.update({'numberOfCores': maxCores}) except AttributeError: logging.info("Failed to read multicore settings from task %s", wmTask.getPathName()) tempSubscription = Subscription(id=wmbsSubscription['id']) # if we have glideinWMS constraints, then adapt all jobs if self.glideinLimits: capResourceEstimates(wmbsJobGroups, self.glideinLimits) nameDictList = [] for wmbsJobGroup in wmbsJobGroups: # For each jobGroup, put a dictionary # together and run it with creatorProcess jobsInGroup = len(wmbsJobGroup.jobs) wmbsJobGroup.subscription = tempSubscription tempDict = {} tempDict.update(processDict) tempDict['jobGroup'] = wmbsJobGroup tempDict['swVersion'] = wmTask.getSwVersion(allSteps=True) tempDict['scramArch'] = wmTask.getScramArch() tempDict['jobNumber'] = jobNumber tempDict['agentNumber'] = self.agentNumber tempDict['agentName'] = self.agentName tempDict['inputDatasetLocations'] = wmbsJobGroup.getLocationsForJobs() tempDict['allowOpportunistic'] = allowOpport jobGroup = creatorProcess(work=tempDict, jobCacheDir=self.jobCacheDir) jobNumber += jobsInGroup # Set jobCache for group for job in jobGroup.jobs: nameDictList.append({'jobid': job['id'], 'cacheDir': job['cache_dir']}) job["user"] = wmWorkload.getOwner()["name"] job["group"] = wmWorkload.getOwner()["group"] # Set the caches in the database try: if len(nameDictList) > 0: self.setBulkCache.execute(jobDictList=nameDictList, conn=myThread.transaction.conn, transaction=True) except WMException: raise except Exception as ex: msg = "Unknown exception while setting the bulk cache:\n" msg += str(ex) logging.error(msg) logging.debug("Error while setting bulkCache with following values: %s\n", nameDictList) raise JobCreatorException(msg) # Advance the jobGroup in changeState for wmbsJobGroup in wmbsJobGroups: self.advanceJobGroup(wmbsJobGroup=wmbsJobGroup) # Now end the transaction so that everything is wrapped # in a single rollback myThread.transaction.commit() # END: While loop over jobFactory # Close the jobFactory wmbsJobFactory.close() return
def testOutput(self): """ _testOutput_ Creat a workflow and add some outputs to it. Verify that these are stored to and loaded from the database correctly. """ testFilesetA = Fileset(name="testFilesetA") testMergedFilesetA = Fileset(name="testMergedFilesetA") testFilesetB = Fileset(name="testFilesetB") testMergedFilesetB = Fileset(name="testMergedFilesetB") testFilesetC = Fileset(name="testFilesetC") testMergedFilesetC = Fileset(name="testMergedFilesetC") testFilesetA.create() testFilesetB.create() testFilesetC.create() testMergedFilesetA.create() testMergedFilesetB.create() testMergedFilesetC.create() testWorkflowA = Workflow(spec="spec.xml", owner="Simon", name="wf001", task='Test') testWorkflowA.create() testWorkflowB = Workflow(name="wf001", task='Test') testWorkflowB.load() self.assertEqual(len(testWorkflowB.outputMap.keys()), 0, "ERROR: Output map exists before output is assigned") testWorkflowA.addOutput("outModOne", testFilesetA, testMergedFilesetA) testWorkflowA.addOutput("outModOne", testFilesetC, testMergedFilesetC) testWorkflowA.addOutput("outModTwo", testFilesetB, testMergedFilesetB) testWorkflowC = Workflow(name="wf001", task='Test') testWorkflowC.load() self.assertEqual(len(testWorkflowC.outputMap.keys()), 2, "ERROR: Incorrect number of outputs in output map") self.assertTrue( "outModOne" in testWorkflowC.outputMap.keys(), "ERROR: Output modules missing from workflow output map") self.assertTrue( "outModTwo" in testWorkflowC.outputMap.keys(), "ERROR: Output modules missing from workflow output map") for outputMap in testWorkflowC.outputMap["outModOne"]: if outputMap["output_fileset"].id == testFilesetA.id: self.assertEqual( outputMap["merged_output_fileset"].id, testMergedFilesetA.id, "Error: Output map incorrectly maps filesets.") else: self.assertEqual( outputMap["merged_output_fileset"].id, testMergedFilesetC.id, "Error: Output map incorrectly maps filesets.") self.assertEqual( outputMap["output_fileset"].id, testFilesetC.id, "Error: Output map incorrectly maps filesets.") self.assertEqual( testWorkflowC.outputMap["outModTwo"][0] ["merged_output_fileset"].id, testMergedFilesetB.id, "Error: Output map incorrectly maps filesets.") self.assertEqual( testWorkflowC.outputMap["outModTwo"][0]["output_fileset"].id, testFilesetB.id, "Error: Output map incorrectly maps filesets.") return
def testTruncatedWFInsertion(self): """ _testTruncatedWFInsertion_ """ resourceControl = ResourceControl() resourceControl.insertSite(siteName='site1', seName='goodse.cern.ch', ceName='site1', plugin="TestPlugin") resourceControl.insertSite(siteName='site2', seName='goodse2.cern.ch', ceName='site2', plugin="TestPlugin") testWorkload = self.createTestWMSpec() testTopLevelTask = getFirstTask(testWorkload) testWMBSHelper = WMBSHelper(testWorkload, testTopLevelTask.name(), "SomeBlock", cachepath=self.workDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS( testTopLevelTask, testWMBSHelper.topLevelFileset) testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask", "someserver", "somedatabase") # create the subscription for multiple top task (MergeTask and CleanupTask for the same block) for task in testWorkload.getTopLevelTask(): testResubmitWMBSHelper = WMBSHelper(testWorkload, task.name(), "SomeBlock2", cachepath=self.workDir) testResubmitWMBSHelper.createTopLevelFileset() testResubmitWMBSHelper._createSubscriptionsInWMBS( task, testResubmitWMBSHelper.topLevelFileset) mergeWorkflow = Workflow(name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask") mergeWorkflow.load() self.assertEqual(mergeWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1, "Error: Wrong number of WF outputs.") unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][ "output_fileset"] unmergedMergeOutput.loadData() self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged", "Error: Unmerged output fileset is wrong.") skimWorkflow = Workflow( name="ResubmitTestWorkload", task="/ResubmitTestWorkload/MergeTask/SkimTask") skimWorkflow.load() self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.") self.assertEqual( skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name, "WMSandbox", "WMWorkload.pkl"), "Error: Wrong spec URL") self.assertEqual(len(skimWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "merged_output_fileset"] unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][ "output_fileset"] mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "merged_output_fileset"] unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][ "output_fileset"] mergedSkimOutputA.loadData() mergedSkimOutputB.loadData() unmergedSkimOutputA.loadData() unmergedSkimOutputB.loadData() self.assertEqual( mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name) self.assertEqual( unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA", "Error: Unmerged output fileset is wrong.") self.assertEqual( mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Merged output fileset is wrong.") self.assertEqual( unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB", "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset( name="ResubmitTestWorkload-MergeTask-SomeBlock2") topLevelFileset.loadData() mergeSubscription = Subscription(fileset=topLevelFileset, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0, "Error: Wrong white/black list for merge sub.") self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algo.") skimSubscription = Subscription(fileset=unmergedMergeOutput, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") return
def testReRecoDroppingRECO(self): """ _testReRecoDroppingRECO_ Verify that ReReco workflows can be created and inserted into WMBS correctly. The ReReco workflow is just a DataProcessing workflow with skims tacked on. This tests run on unmerged RECO output """ skimConfig = self.injectSkimConfig() recoConfig = self.injectReRecoConfig() dataProcArguments = ReRecoWorkloadFactory.getTestArguments() dataProcArguments["ProcessingString"] = "ProcString" dataProcArguments["ConfigCacheID"] = recoConfig dataProcArguments.update({"SkimName1": "SomeSkim", "SkimInput1": "RECOoutput", "Skim1ConfigCacheID": skimConfig}) dataProcArguments["CouchURL"] = os.environ["COUCHURL"] dataProcArguments["CouchDBName"] = "rereco_t" dataProcArguments["TransientOutputModules"] = ["RECOoutput"] dataProcArguments["EnableHarvesting"] = True dataProcArguments["DQMConfigCacheID"] = self.injectDQMHarvestConfig() factory = ReRecoWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", dataProcArguments) self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children. \ SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules. \ Merged.mergedLFNBase, '/store/data/FAKE/MinimumBias/USER/SkimBFilter-ProcString-v1') testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath=self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) skimWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim") skimWorkflow.load() self.assertEqual(len(skimWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = {"SkimA": "RAW-RECO", "SkimB": "USER"} for goldenOutputMod, tier in goldenOutputMods.items(): fset = goldenOutputMod + tier mergedOutput = skimWorkflow.outputMap[fset][0]["merged_output_fileset"] unmergedOutput = skimWorkflow.outputMap[fset][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % ( goldenOutputMod, tier), "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % fset, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod, tier in goldenOutputMods.items(): mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged%s" % tier][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % ( goldenOutputMod, tier), "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged%s" % ( goldenOutputMod, tier), "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name="/TestWorkload/DataProcessing/unmerged-RECOoutputRECO") topLevelFileset.loadData() skimSubscription = Subscription(fileset=topLevelFileset, workflow=skimWorkflow) skimSubscription.loadData() self.assertEqual(skimSubscription["type"], "Skim", "Error: Wrong subscription type.") self.assertEqual(skimSubscription["split_algo"], "FileBased", "Error: Wrong split algo.") for skimOutput, tier in goldenOutputMods.items(): fset = skimOutput + tier unmerged = Fileset(name="/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % fset) unmerged.loadData() mergeWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" % skimOutput) mergeWorkflow.load() mergeSubscription = Subscription(fileset=unmerged, workflow=mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for skimOutput, tier in goldenOutputMods.items(): fset = skimOutput + tier unmerged = Fileset(name="/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" % fset) unmerged.loadData() cleanupWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimCleanupUnmerged%s" % skimOutput) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for skimOutput in goldenOutputMods: skimMergeLogCollect = Fileset( name="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive" % skimOutput) skimMergeLogCollect.loadData() skimMergeLogCollectWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/SomeSkim%sMergeLogCollect" % ( skimOutput, skimOutput)) skimMergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset=skimMergeLogCollect, workflow=skimMergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def testDependentReDigi(self): """ _testDependentReDigi_ Verfiy that a dependent ReDigi workflow that keeps stages out RAW data is created and installed into WMBS correctly. """ defaultArguments = ReDigiWorkloadFactory.getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "redigi_t" configs = injectReDigiConfigs(self.configDatabase) defaultArguments["StepOneConfigCacheID"] = configs[0] defaultArguments["StepTwoConfigCacheID"] = configs[1] defaultArguments["StepThreeConfigCacheID"] = configs[2] defaultArguments["StepOneOutputModuleName"] = "RAWDEBUGoutput" defaultArguments["StepTwoOutputModuleName"] = "RECODEBUGoutput" factory = ReDigiWorkloadFactory() testWorkload = factory.factoryWorkloadConstruction("TestWorkload", defaultArguments) testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) topLevelFileset = Fileset(name = "TestWorkload-StepOneProc-SomeBlock") topLevelFileset.loadData() stepOneUnmergedRAWFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-RAWDEBUGoutput") stepOneUnmergedRAWFileset.loadData() stepOneMergedRAWFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/merged-Merged") stepOneMergedRAWFileset.loadData() stepOneLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-logArchive") stepOneLogArchiveFileset.loadData() stepOneMergeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/merged-logArchive") stepOneMergeLogArchiveFileset.loadData() stepTwoUnmergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-DQMoutput") stepTwoUnmergedDQMFileset.loadData() stepTwoUnmergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-RECODEBUGoutput") stepTwoUnmergedRECOFileset.loadData() stepTwoMergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput/merged-Merged") stepTwoMergedDQMFileset.loadData() stepTwoMergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/merged-Merged") stepTwoMergedRECOFileset.loadData() stepTwoLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-logArchive") stepTwoLogArchiveFileset.loadData() stepTwoMergeDQMLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput/merged-logArchive") stepTwoMergeDQMLogArchiveFileset.loadData() stepTwoMergeRECOLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/merged-logArchive") stepTwoMergeRECOLogArchiveFileset.loadData() stepThreeUnmergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/unmerged-aodOutputModule") stepThreeUnmergedAODFileset.loadData() stepThreeMergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule/merged-Merged") stepThreeMergedAODFileset.loadData() stepThreeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/unmerged-logArchive") stepThreeLogArchiveFileset.loadData() stepThreeMergeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule/merged-logArchive") stepThreeMergeLogArchiveFileset.loadData() stepOneWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc") stepOneWorkflow.load() self.assertEqual(stepOneWorkflow.wfType, 'reprocessing') self.assertTrue("logArchive" in stepOneWorkflow.outputMap.keys(), "Error: Step one missing output module.") self.assertTrue("RAWDEBUGoutput" in stepOneWorkflow.outputMap.keys(), "Error: Step one missing output module.") self.assertEqual(stepOneWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepOneLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepOneLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneWorkflow.outputMap["RAWDEBUGoutput"][0]["merged_output_fileset"].id, stepOneMergedRAWFileset.id, "Error: RAWDEBUG output fileset is wrong.") self.assertEqual(stepOneWorkflow.outputMap["RAWDEBUGoutput"][0]["output_fileset"].id, stepOneUnmergedRAWFileset.id, "Error: RAWDEBUG output fileset is wrong.") for outputMod in stepOneWorkflow.outputMap.keys(): self.assertTrue(len(stepOneWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepOneSub = Subscription(workflow = stepOneWorkflow, fileset = topLevelFileset) stepOneSub.loadData() self.assertEqual(stepOneSub["type"], "Processing", "Error: Step one sub has wrong type.") stepOneCleanupWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedRAWDEBUGoutput") stepOneCleanupWorkflow.load() self.assertEqual(len(stepOneCleanupWorkflow.outputMap.keys()), 0, "Error: Cleanup should have no output.") stepOneCleanupSub = Subscription(workflow = stepOneCleanupWorkflow, fileset = stepOneUnmergedRAWFileset) stepOneCleanupSub.loadData() self.assertEqual(stepOneCleanupSub["type"], "Cleanup", "Error: Step one sub has wrong type.") stepOneLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/LogCollect") stepOneLogCollectWorkflow.load() self.assertEqual(len(stepOneLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect should have no output.") stepOneLogCollectSub = Subscription(workflow = stepOneLogCollectWorkflow, fileset = stepOneLogArchiveFileset) stepOneLogCollectSub.loadData() self.assertEqual(stepOneLogCollectSub["type"], "LogCollect", "Error: Step one sub has wrong type.") stepOneMergeWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput") stepOneMergeWorkflow.load() self.assertTrue("Merged" in stepOneMergeWorkflow.outputMap.keys(), "Error: Step one merge missing output module.") self.assertTrue("logArchive" in stepOneMergeWorkflow.outputMap.keys(), "Error: Step one merge missing output module.") self.assertEqual(stepOneMergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepOneMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneMergeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepOneMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepOneMergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepOneMergedRAWFileset.id, "Error: RAWDEBUG merge output fileset is wrong.") self.assertEqual(stepOneMergeWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepOneMergedRAWFileset.id, "Error: RAWDEBUG merge output fileset is wrong.") for outputMod in stepOneMergeWorkflow.outputMap.keys(): self.assertTrue(len(stepOneMergeWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepOneMergeSub = Subscription(workflow = stepOneMergeWorkflow, fileset = stepOneUnmergedRAWFileset) stepOneMergeSub.loadData() self.assertEqual(stepOneMergeSub["type"], "Merge", "Error: Step one sub has wrong type.") stepTwoWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc") stepTwoWorkflow.load() self.assertTrue("RECODEBUGoutput" in stepTwoWorkflow.outputMap.keys(), "Error: Step two missing output module.") self.assertTrue("DQMoutput" in stepTwoWorkflow.outputMap.keys(), "Error: Step two missing output module.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["output_fileset"].id, stepTwoUnmergedRECOFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM output fileset is wrong.") self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["output_fileset"].id, stepTwoUnmergedDQMFileset.id, "Error: DQM output fileset is wrong.") stepTwoSub = Subscription(workflow = stepTwoWorkflow, fileset = stepOneMergedRAWFileset) stepTwoSub.loadData() self.assertEqual(stepTwoSub["type"], "Processing", "Error: Step two sub has wrong type.") for outputMod in stepTwoWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepTwoCleanupDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcCleanupUnmergedDQMoutput") stepTwoCleanupDQMWorkflow.load() self.assertEqual(len(stepTwoCleanupDQMWorkflow.outputMap.keys()), 0, "Error: Cleanup shouldn't have any output.") stepTwoCleanupDQMSub = Subscription(workflow = stepTwoCleanupDQMWorkflow, fileset = stepTwoUnmergedDQMFileset) stepTwoCleanupDQMSub.loadData() self.assertEqual(stepTwoCleanupDQMSub["type"], "Cleanup", "Error: Step two sub has wrong type.") stepTwoCleanupRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcCleanupUnmergedRECODEBUGoutput") stepTwoCleanupRECOWorkflow.load() self.assertEqual(len(stepTwoCleanupRECOWorkflow.outputMap.keys()), 0, "Error: Cleanup shouldn't have any output.") stepTwoCleanupRECOSub = Subscription(workflow = stepTwoCleanupRECOWorkflow, fileset = stepTwoUnmergedRECOFileset) stepTwoCleanupRECOSub.loadData() self.assertEqual(stepTwoCleanupRECOSub["type"], "Cleanup", "Error: Step two sub has wrong type.") stepTwoLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcLogCollect") stepTwoLogCollectWorkflow.load() self.assertEqual(len(stepTwoLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect shouldn't have any output.") stepTwoLogCollectSub = Subscription(workflow = stepTwoLogCollectWorkflow, fileset = stepTwoLogArchiveFileset) stepTwoLogCollectSub.loadData() self.assertEqual(stepTwoLogCollectSub["type"], "LogCollect", "Error: Step two sub has wrong type.") stepTwoMergeRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput") stepTwoMergeRECOWorkflow.load() self.assertTrue("Merged" in stepTwoMergeRECOWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertTrue("logArchive" in stepTwoMergeRECOWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG merge output fileset is wrong.") self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedRECOFileset.id, "Error: RECODEBUG merge output fileset is wrong.") stepTwoMergeRECOSub = Subscription(workflow = stepTwoMergeRECOWorkflow, fileset = stepTwoUnmergedRECOFileset) stepTwoMergeRECOSub.loadData() self.assertEqual(stepTwoMergeRECOSub["type"], "Merge", "Error: Step two sub has wrong type.") for outputMod in stepTwoMergeRECOWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoMergeRECOWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepTwoMergeDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput") stepTwoMergeDQMWorkflow.load() self.assertTrue("Merged" in stepTwoMergeDQMWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertTrue("logArchive" in stepTwoMergeDQMWorkflow.outputMap.keys(), "Error: Step two merge missing output module.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM merge output fileset is wrong.") self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedDQMFileset.id, "Error: DQM merge output fileset is wrong.") stepTwoMergeDQMSub = Subscription(workflow = stepTwoMergeDQMWorkflow, fileset = stepTwoUnmergedDQMFileset) stepTwoMergeDQMSub.loadData() self.assertEqual(stepTwoMergeDQMSub["type"], "Merge", "Error: Step two sub has wrong type.") for outputMod in stepTwoMergeDQMWorkflow.outputMap.keys(): self.assertTrue(len(stepTwoMergeDQMWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepThreeWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc") stepThreeWorkflow.load() self.assertTrue("aodOutputModule" in stepThreeWorkflow.outputMap.keys(), "Error: Step three missing output module.") self.assertTrue("logArchive" in stepThreeWorkflow.outputMap.keys(), "Error: Step three missing output module.") self.assertEqual(stepThreeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepThreeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepThreeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeWorkflow.outputMap["aodOutputModule"][0]["merged_output_fileset"].id, stepThreeMergedAODFileset.id, "Error: RECODEBUG output fileset is wrong.") self.assertEqual(stepThreeWorkflow.outputMap["aodOutputModule"][0]["output_fileset"].id, stepThreeUnmergedAODFileset.id, "Error: RECODEBUG output fileset is wrong.") stepThreeSub = Subscription(workflow = stepThreeWorkflow, fileset = stepTwoMergedRECOFileset) stepThreeSub.loadData() self.assertEqual(stepThreeSub["type"], "Processing", "Error: Step three sub has wrong type.") for outputMod in stepThreeWorkflow.outputMap.keys(): self.assertTrue(len(stepThreeWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") stepThreeCleanupWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcCleanupUnmergedaodOutputModule") stepThreeCleanupWorkflow.load() self.assertEqual(len(stepThreeCleanupWorkflow.outputMap.keys()), 0, "Error: Cleanup should have no output.") stepThreeCleanupSub = Subscription(workflow = stepThreeCleanupWorkflow, fileset = stepThreeUnmergedAODFileset) stepThreeCleanupSub.loadData() self.assertEqual(stepThreeCleanupSub["type"], "Cleanup", "Error: Step three sub has wrong type.") stepThreeLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcLogCollect") stepThreeLogCollectWorkflow.load() self.assertEqual(len(stepThreeLogCollectWorkflow.outputMap.keys()), 0, "Error: LogCollect should have no output.") stepThreeLogCollectSub = Subscription(workflow = stepThreeLogCollectWorkflow, fileset = stepThreeLogArchiveFileset) stepThreeLogCollectSub.loadData() self.assertEqual(stepThreeLogCollectSub["type"], "LogCollect", "Error: Step three sub has wrong type.") stepThreeMergeWorkflow = Workflow(spec = "somespec", name = "TestWorkload", task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule") stepThreeMergeWorkflow.load() self.assertTrue("Merged" in stepThreeMergeWorkflow.outputMap.keys(), "Error: Step three merge missing output module.") self.assertTrue("logArchive" in stepThreeMergeWorkflow.outputMap.keys(), "Error: Step three merge missing output module.") self.assertEqual(stepThreeMergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepThreeMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeMergeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepThreeMergeLogArchiveFileset.id, "Error: logArchive fileset is wrong.") self.assertEqual(stepThreeMergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepThreeMergedAODFileset.id, "Error: AOD merge output fileset is wrong.") self.assertEqual(stepThreeMergeWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepThreeMergedAODFileset.id, "Error: AOD merge output fileset is wrong.") stepThreeMergeSub = Subscription(workflow = stepThreeMergeWorkflow, fileset = stepThreeUnmergedAODFileset) stepThreeMergeSub.loadData() self.assertEqual(stepThreeMergeSub["type"], "Merge", "Error: Step three sub has wrong type.") for outputMod in stepThreeMergeWorkflow.outputMap.keys(): self.assertTrue(len(stepThreeMergeWorkflow.outputMap[outputMod]) == 1, "Error: more than one destination for output mod.") return
def _generationTaskTest(self): # retrieve task from the installed workflow genTask = Workflow(name="TestWorkload", task="/TestWorkload/Generation") genTask.load() self.assertEqual(len(genTask.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") # output modules goldenOutputMods = ["OutputA"] for o in goldenOutputMods: mergedOutput = genTask.outputMap[o][0]["merged_output_fileset"] unmergedOutput = genTask.outputMap[o][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/Generation/GenerationMerge%s/merged-Merged" % o, "Error: Merged output fileset is wrong: %s" % mergedOutput.name, ) self.assertEqual( unmergedOutput.name, "/TestWorkload/Generation/unmerged-%s" % o, "Error: Unmerged output fileset is wrong.", ) logArchOutput = genTask.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = genTask.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Generation/unmerged-logArchive", "Error: LogArchive output fileset is wrong.", ) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Generation/unmerged-logArchive", "Error: LogArchive output fileset is wrong.", ) for o in goldenOutputMods: mergeTask = Workflow(name="TestWorkload", task="/TestWorkload/Generation/GenerationMerge%s" % o) mergeTask.load() self.assertEqual(len(mergeTask.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeTask.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeTask.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/Generation/GenerationMerge%s/merged-Merged" % o, "Error: Merged output fileset is wrong.", ) self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/Generation/GenerationMerge%s/merged-Merged" % o, "Error: Unmerged output fileset is wrong.", ) logArchOutput = mergeTask.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeTask.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Generation/GenerationMerge%s/merged-logArchive" % o, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name, ) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Generation/GenerationMerge%s/merged-logArchive" % o, "Error: LogArchive output fileset is wrong.", ) topLevelFileset = Fileset(name="TestWorkload-Generation-SomeBlock") topLevelFileset.loadData() prodSubscription = Subscription(fileset=topLevelFileset, workflow=genTask) prodSubscription.loadData() self.assertEqual(prodSubscription["type"], "Production", "Error: Wrong subscription type.") self.assertEqual(prodSubscription["split_algo"], "EventBased", "Error: Wrong split algo.") for o in goldenOutputMods: unmergedOutput = Fileset(name="/TestWorkload/Generation/unmerged-%s" % o) unmergedOutput.loadData() mergeTask = Workflow(name="TestWorkload", task="/TestWorkload/Generation/GenerationMerge%s" % o) mergeTask.load() mergeSubscription = Subscription(fileset=unmergedOutput, workflow=mergeTask) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual( mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"], ) for o in goldenOutputMods: unmerged = Fileset(name="/TestWorkload/Generation/unmerged-%s" % o) unmerged.loadData() cleanupTask = Workflow(name="TestWorkload", task="/TestWorkload/Generation/GenerationCleanupUnmerged%s" % o) cleanupTask.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupTask) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") genLogCollect = Fileset(name="/TestWorkload/Generation/unmerged-logArchive") genLogCollect.loadData() genLogCollectTask = Workflow(name="TestWorkload", task="/TestWorkload/Generation/GenLogCollect") genLogCollectTask.load() logCollectSub = Subscription(fileset=genLogCollect, workflow=genLogCollectTask) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "EndOfRun", "Error: Wrong split algo.") for o in goldenOutputMods: mergeLogCollect = Fileset(name="/TestWorkload/Generation/GenerationMerge%s/merged-logArchive" % o) mergeLogCollect.loadData() mergeLogCollectTask = Workflow( name="TestWorkload", task="/TestWorkload/Generation/GenerationMerge%s/Generation%sMergeLogCollect" % (o, o), ) mergeLogCollectTask.load() logCollectSub = Subscription(fileset=mergeLogCollect, workflow=mergeLogCollectTask) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "EndOfRun", "Error: Wrong split algo.")
def _stepTwoTaskTest(self): """ _stepTwoTaskTest_ """ alcaRecoTask = Workflow( name="TestWorkload", task="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo", ) alcaRecoTask.load() self.assertEqual(len(alcaRecoTask.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") # output modules goldenOutputMods = ["OutputC", "OutputD"] for o in goldenOutputMods: mergedOutput = alcaRecoTask.outputMap[o][0]["merged_output_fileset"] unmergedOutput = alcaRecoTask.outputMap[o][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual( mergedOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-Merged" % o, ("Error: Merged output fileset is wrong: %s" % mergedOutput.name), ) self.assertEqual( unmergedOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/unmerged-%s" % o, ("Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name), ) logArchOutput = alcaRecoTask.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = alcaRecoTask.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/unmerged-logArchive", ("Error: LogArchive output fileset is wrong: %s" % logArchOutput.name), ) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/unmerged-logArchive", ("Error: LogArchive output fileset is wrong: %s" % unmergedLogArchOutput.name), ) for o in goldenOutputMods: mergeTask = Workflow( name="TestWorkload", task="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s" % o, ) mergeTask.load() self.assertEqual(len(mergeTask.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeTask.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeTask.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual( mergedMergeOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-Merged" % o, ("Error: Merged output fileset is wrong: %s" % mergedMergeOutput.name), ) self.assertEqual( unmergedMergeOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-Merged" % o, ("Error: Unmerged output fileset is wrong: %s" % unmergedMergeOutput.name), ) logArchOutput = mergeTask.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeTask.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual( logArchOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-logArchive" % o, ("Error: LogArchive output fileset is wrong: %s" % logArchOutput.name), ) self.assertEqual( unmergedLogArchOutput.name, "/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-logArchive" % o, ("Error: LogArchive output fileset is wrong: %s" % unmergedLogArchOutput.name), ) for o in goldenOutputMods: unmerged = Fileset( name="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/unmerged-%s" % o ) unmerged.loadData() mergeTask = Workflow( name="TestWorkload", task="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s" % o, ) mergeTask.load() mergeSubscription = Subscription(fileset=unmerged, workflow=mergeTask) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo.") for o in goldenOutputMods: unmerged = Fileset( name="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/unmerged-%s" % o ) unmerged.loadData() cleanupTask = Workflow( name="TestWorkload", task="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoCleanupUnmerged%s" % o, ) cleanupTask.load() cleanupSubscription = Subscription(fileset=unmerged, workflow=cleanupTask) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") for o in goldenOutputMods: alcaRecoMergeLogCollect = Fileset( name="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/merged-logArchive" % o ) alcaRecoMergeLogCollect.loadData() alcaRecoMergeLogCollectTask = Workflow( name="TestWorkload", task="/TestWorkload/Generation/GenerationMergeOutputA/StepOne/StepOneMergeOutputB/StepTwo/StepTwoMerge%s/StepTwo%sMergeLogCollect" % (o, o), ) alcaRecoMergeLogCollectTask.load() logCollectSub = Subscription(fileset=alcaRecoMergeLogCollect, workflow=alcaRecoMergeLogCollectTask) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "EndOfRun", "Error: Wrong split algo.")
def _checkTask(self, task, taskConf, centralConf): """ _checkTask_ Verify the correctness of the task """ if "InputTask" in taskConf: inpTaskPath = task.getPathName() inpTaskPath = inpTaskPath.replace(task.name(), "") inpTaskPath += "cmsRun1" self.assertEqual(task.data.input.inputStep, inpTaskPath, "Input step is wrong in the spec") self.assertTrue(taskConf["InputTask"] in inpTaskPath, "Input task is not in the path name for child task") if "MCPileup" in taskConf or "DataPileup" in taskConf: mcDataset = taskConf.get('MCPileup', None) dataDataset = taskConf.get('DataPileup', None) if mcDataset: self.assertEqual(task.data.steps.cmsRun1.pileup.mc.dataset, [mcDataset]) if dataDataset: self.assertEqual(task.data.steps.cmsRun1.pileup.data.dataset, [dataDataset]) workflow = Workflow(name = self.workload.name(), task = task.getPathName()) workflow.load() outputMods = outputModuleList(task) self.assertEqual(len(workflow.outputMap.keys()), len(outputMods), "Error: Wrong number of WF outputs") for outputModule in outputMods: filesets = workflow.outputMap[outputModule][0] merged = filesets['merged_output_fileset'] unmerged = filesets['output_fileset'] merged.loadData() unmerged.loadData() mergedset = task.getPathName() + "/" + task.name() + "Merge" + outputModule + "/merged-Merged" if outputModule == "logArchive" or not taskConf.get("KeepOutput", True) \ or outputModule in taskConf.get("TransientOutputModules", []) or outputModule in centralConf.get("IgnoredOutputModules", []): mergedset = task.getPathName() + "/unmerged-" + outputModule unmergedset = task.getPathName() + "/unmerged-" + outputModule self.assertEqual(mergedset, merged.name, "Merged fileset name is wrong") self.assertEqual(unmergedset, unmerged.name, "Unmerged fileset name is wrong") if outputModule != "logArchive" and taskConf.get("KeepOutput", True) \ and outputModule not in taskConf.get("TransientOutputModules", []) \ and outputModule not in centralConf.get("IgnoredOutputModules", []): mergeTask = task.getPathName() + "/" + task.name() + "Merge" + outputModule mergeWorkflow = Workflow(name = self.workload.name(), task = mergeTask) mergeWorkflow.load() self.assertTrue("Merged" in mergeWorkflow.outputMap, "Merge workflow does not contain a Merged output key") mergedOutputMod = mergeWorkflow.outputMap['Merged'][0] mergedFileset = mergedOutputMod['merged_output_fileset'] unmergedFileset = mergedOutputMod['output_fileset'] mergedFileset.loadData() unmergedFileset.loadData() self.assertEqual(mergedFileset.name, mergedset, "Merged fileset name in merge task is wrong") self.assertEqual(unmergedFileset.name, mergedset, "Unmerged fileset name in merge task is wrong") mrgLogArch = mergeWorkflow.outputMap['logArchive'][0]['merged_output_fileset'] umrgLogArch = mergeWorkflow.outputMap['logArchive'][0]['output_fileset'] mrgLogArch.loadData() umrgLogArch.loadData() archName = task.getPathName() + "/" + task.name() + "Merge" + outputModule + "/merged-logArchive" self.assertEqual(mrgLogArch.name, archName, "LogArchive merged fileset name is wrong in merge task") self.assertEqual(umrgLogArch.name, archName, "LogArchive unmerged fileset name is wrong in merge task") if outputModule != "logArchive": taskOutputMods = task.getOutputModulesForStep(stepName = "cmsRun1") currentModule = getattr(taskOutputMods, outputModule) if "PrimaryDataset" in taskConf: self.assertEqual(currentModule.primaryDataset, taskConf["PrimaryDataset"], "Wrong primary dataset") processedDatasetParts = ["AcquisitionEra, ProcessingString, ProcessingVersion"] allParts = True for part in processedDatasetParts: if part in taskConf: self.asserTrue(part in currentModule.processedDataset, "Wrong processed dataset for module") else: allParts = False if allParts: print "yes" self.assertEqual("%s-%s-v%s" % (taskConf["AcquisitionEra"], taskConf["ProcessingString"], taskConf["ProcessingVersion"]), "Wrong processed dataset for module") # Test subscriptions if "InputTask" not in taskConf: inputFileset = "%s-%s-SomeBlock" % (self.workload.name(), task.name()) elif "Merge" in task.getPathName().split("/")[-2]: inpTaskPath = task.getPathName().replace(task.name(), "") inputFileset = inpTaskPath + "merged-Merged" else: inpTaskPath = task.getPathName().replace(task.name(), "") inputFileset = inpTaskPath + "unmerged-%s" % taskConf["InputFromOutputModule"] taskFileset = Fileset(name = inputFileset) taskFileset.loadData() taskSubscription = Subscription(fileset = taskFileset, workflow = workflow) taskSubscription.loadData() if "InputTask" not in taskConf and "InputDataset" not in taskConf: # Production type self.assertEqual(taskSubscription["type"], "Production", "Error: Wrong subscription type for processing task") self.assertEqual(taskSubscription["split_algo"], taskConf["SplittingAlgorithm"], "Error: Wrong split algo for generation task") else: # Processing type self.assertEqual(taskSubscription["type"], "Processing", "Wrong subscription type for task") if taskSubscription["split_algo"] != "WMBSMergeBySize": self.assertEqual(taskSubscription["split_algo"], taskConf['SplittingAlgorithm'], "Splitting algo mismatch") else: self.assertEqual(taskFileset.name, inpTaskPath + "unmerged-%s" % taskConf["InputFromOutputModule"], "Subscription uses WMBSMergeBySize on a merge fileset") return
def killWorkflows(self, workflows): """ _killWorkflows_ Delete all the information in couch and WMBS about the given workflow, go through all subscriptions and delete one by one. The input is a dictionary with workflow names as keys, fully loaded WMWorkloads and subscriptions lists as values """ for workflow in workflows: logging.info("Deleting workflow %s" % workflow) try: #Get the task-workflow ids, sort them by ID, #higher ID first so we kill #the leaves of the tree first, root last workflowsIDs = workflows[workflow]["workflows"].keys() workflowsIDs.sort(reverse = True) #Now go through all tasks and load the WMBS workflow objects wmbsWorkflows = [] for wfID in workflowsIDs: wmbsWorkflow = Workflow(id = wfID) wmbsWorkflow.load() wmbsWorkflows.append(wmbsWorkflow) #Time to shoot one by one for wmbsWorkflow in wmbsWorkflows: if self.uploadPublishInfo: self.createAndUploadPublish(wmbsWorkflow) #Load all the associated subscriptions and shoot them one by one subIDs = workflows[workflow]["workflows"][wmbsWorkflow.id] for subID in subIDs: subscription = Subscription(id = subID) subscription['workflow'] = wmbsWorkflow subscription.load() subscription.deleteEverything() #Check that the workflow is gone if wmbsWorkflow.exists(): #Something went bad, this workflow #should be gone by now msg = "Workflow %s, Task %s was not deleted completely" % (wmbsWorkflow.name, wmbsWorkflow.task) raise TaskArchiverPollerException(msg) #Now delete directories _, taskDir = getMasterName(startDir = self.jobCacheDir, workflow = wmbsWorkflow) logging.info("About to delete work directory %s" % taskDir) if os.path.exists(taskDir): if os.path.isdir(taskDir): shutil.rmtree(taskDir) else: # What we think of as a working directory is not a directory # This should never happen and there is no way we can recover # from this here. Bail out now and have someone look at things. msg = "Work directory is not a directory, this should never happen: %s" % taskDir raise TaskArchiverPollerException(msg) else: msg = "Attempted to delete work directory but it was already gone: %s" % taskDir logging.debug(msg) spec = workflows[workflow]["spec"] topTask = spec.getTopLevelTask()[0] # Now take care of the sandbox sandbox = getattr(topTask.data.input, 'sandbox', None) if sandbox: sandboxDir = os.path.dirname(sandbox) if os.path.isdir(sandboxDir): shutil.rmtree(sandboxDir) logging.debug("Sandbox dir deleted") else: logging.error("Attempted to delete sandbox dir but it was already gone: %s" % sandboxDir) except Exception, ex: msg = "Critical error while deleting workflow %s\n" % workflow msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) self.sendAlert(2, msg = msg)
def testPrivateMC(self): """ _testAnalysis_ """ defaultArguments = getTestArguments() defaultArguments["CouchURL"] = os.environ["COUCHURL"] defaultArguments["CouchDBName"] = "privatemc_t" defaultArguments["AnalysisConfigCacheDoc"] = self.injectAnalysisConfig() defaultArguments["ProcessingVersion"] = 1 processingFactory = PrivateMCWorkloadFactory() testWorkload = processingFactory("TestWorkload", defaultArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "DMWM") testWMBSHelper = WMBSHelper(testWorkload, "PrivateMC", "SomeBlock", cachepath = self.testDir) testWMBSHelper.createTopLevelFileset() testWMBSHelper._createSubscriptionsInWMBS(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) procWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/PrivateMC") procWorkflow.load() self.assertEqual(len(procWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs: %s" % len(procWorkflow.outputMap.keys())) logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]#Actually Analysis does not have a merge task unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/PrivateMC/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/PrivateMC/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["OutputA", "OutputB"] for goldenOutputMod in goldenOutputMods: mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/PrivateMC/unmerged-%s" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/PrivateMC/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-PrivateMC-SomeBlock") topLevelFileset.loadData() procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow) procSubscription.loadData() self.assertEqual(procSubscription["type"], "PrivateMC", "Error: Wrong subscription type.") self.assertEqual(procSubscription["split_algo"], "EventBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/PrivateMC/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/PrivateMC/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.")
def testPromptReco(self): """ _testPromptReco_ Create a Prompt Reconstruction workflow and verify it installs into WMBS correctly. """ testArguments = getTestArguments() testWorkload = promptrecoWorkload("TestWorkload", testArguments) testWorkload.setSpecUrl("somespec") testWorkload.setOwnerDetails("*****@*****.**", "T0") testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock") testWMBSHelper.createTopLevelFileset() testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset) recoWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco") recoWorkflow.load() self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1, "Error: Wrong number of WF outputs in the Reco WF.") goldenOutputMods = ["write_RECO", "write_ALCARECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() if goldenOutputMod != "write_ALCARECO": self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = recoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") alcaSkimWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim") alcaSkimWorkflow.load() self.assertEqual(len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1, "Error: Wrong number of WF outputs in the AlcaSkim WF.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name) logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") dqmWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged") dqmWorkflow.load() logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys())) mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-Reco-SomeBlock") topLevelFileset.loadData() recoSubscription = Subscription(fileset = topLevelFileset, workflow = recoWorkflow) recoSubscription.loadData() self.assertEqual(recoSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(recoSubscription["split_algo"], "EventBased", "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"]) alcaRecoFileset = Fileset(name = "/TestWorkload/Reco/unmerged-write_ALCARECO") alcaRecoFileset.loadData() alcaSkimSubscription = Subscription(fileset = alcaRecoFileset, workflow = alcaSkimWorkflow) alcaSkimSubscription.loadData() self.assertEqual(alcaSkimSubscription["type"], "Processing", "Error: Wrong subscription type.") self.assertEqual(alcaSkimSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"]) mergedDQMFileset = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_DQM/merged-Merged") mergedDQMFileset.loadData() dqmSubscription = Subscription(fileset = mergedDQMFileset, workflow = dqmWorkflow) dqmSubscription.loadData() self.assertEqual(dqmSubscription["type"], "Harvesting", "Error: Wrong subscription type.") self.assertEqual(dqmSubscription["split_algo"], "Harvest", "Error: Wrong split algo.") unmergedOutputs = ["write_RECO", "write_AOD", "write_DQM"] for unmergedOutput in unmergedOutputs: unmergedDataTier = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % unmergedOutput) unmergedDataTier.loadData() dataTierMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s" % unmergedOutput) dataTierMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedDataTier, workflow = dataTierMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) unmergedOutputs = [] for alcaProd in testArguments["AlcaSkims"]: unmergedOutputs.append("ALCARECOStream%s" % alcaProd) for unmergedOutput in unmergedOutputs: unmergedAlcaSkim = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % unmergedOutput) unmergedAlcaSkim.loadData() alcaSkimMergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput) alcaSkimMergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedAlcaSkim, workflow = alcaSkimMergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize", "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"]) goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM", "write_ALCARECO"] for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: unmergedFileset = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod) unmergedFileset.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" %goldenOutputMod) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong subscription type.") recoLogCollect = Fileset(name = "/TestWorkload/Reco/unmerged-logArchive") recoLogCollect.loadData() recoLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/LogCollect") recoLogCollectWorkflow.load() logCollectSub = Subscription(fileset = recoLogCollect, workflow = recoLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive") alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect") alcaSkimLogCollectWorkflow.load() logCollectSub = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"] for goldenOutputMod in goldenOutputMods: recoMergeLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod) recoMergeLogCollect.loadData() recoMergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) recoMergeLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = recoMergeLogCollect, workflow = recoMergeLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") goldenOutputMods = [] for alcaProd in testArguments["AlcaSkims"]: goldenOutputMods.append("ALCARECOStream%s" % alcaProd) for goldenOutputMod in goldenOutputMods: alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod) alcaSkimLogCollect.loadData() alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod)) alcaSkimLogCollectWorkflow.load() logCollectSubscription = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow) logCollectSubscription.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algorithm.") dqmHarvestLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/unmerged-logArchive") dqmHarvestLogCollect.loadData() dqmHarvestLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/RecoMergewrite_DQMMergedDQMHarvestLogCollect") dqmHarvestLogCollectWorkflow.load() logCollectSub = Subscription(fileset = dqmHarvestLogCollect, workflow = dqmHarvestLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") return
def _commonMonteCarloTest(self): """ Retrieve the workload from WMBS and test all its properties. """ prodWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Production") prodWorkflow.load() self.assertEqual(len(prodWorkflow.outputMap.keys()), 3, "Error: Wrong number of WF outputs.") goldenOutputMods = ["OutputA", "OutputB"] for goldenOutputMod in goldenOutputMods: mergedOutput = prodWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"] unmergedOutput = prodWorkflow.outputMap[goldenOutputMod][0]["output_fileset"] mergedOutput.loadData() unmergedOutput.loadData() self.assertEqual(mergedOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong: %s" % mergedOutput.name) self.assertEqual(unmergedOutput.name, "/TestWorkload/Production/unmerged-%s" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = prodWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = prodWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Production/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Production/unmerged-logArchive", "Error: LogArchive output fileset is wrong.") for goldenOutputMod in goldenOutputMods: mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Production/ProductionMerge%s" % goldenOutputMod) mergeWorkflow.load() self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2, "Error: Wrong number of WF outputs.") mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"] unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"] mergedMergeOutput.loadData() unmergedMergeOutput.loadData() self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod, "Error: Merged output fileset is wrong.") self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-Merged" % goldenOutputMod, "Error: Unmerged output fileset is wrong.") logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"] unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"] logArchOutput.loadData() unmergedLogArchOutput.loadData() self.assertEqual(logArchOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name) self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % goldenOutputMod, "Error: LogArchive output fileset is wrong.") topLevelFileset = Fileset(name = "TestWorkload-Production-SomeBlock") topLevelFileset.loadData() prodSubscription = Subscription(fileset = topLevelFileset, workflow = prodWorkflow) prodSubscription.loadData() self.assertEqual(prodSubscription["type"], "Production", "Error: Wrong subscription type.") self.assertEqual(prodSubscription["split_algo"], "EventBased", "Error: Wrong split algo.") for outputName in ["OutputA", "OutputB"]: unmergedOutput = Fileset(name = "/TestWorkload/Production/unmerged-%s" % outputName) unmergedOutput.loadData() mergeWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Production/ProductionMerge%s" % outputName) mergeWorkflow.load() mergeSubscription = Subscription(fileset = unmergedOutput, workflow = mergeWorkflow) mergeSubscription.loadData() self.assertEqual(mergeSubscription["type"], "Merge", "Error: Wrong subscription type.") self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize", "Error: Wrong split algo: %s" % mergeSubscription["split_algo"]) for outputName in ["OutputA", "OutputB"]: unmerged = Fileset(name = "/TestWorkload/Production/unmerged-%s" % outputName) unmerged.loadData() cleanupWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Production/ProductionCleanupUnmerged%s" % outputName) cleanupWorkflow.load() cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow) cleanupSubscription.loadData() self.assertEqual(cleanupSubscription["type"], "Cleanup", "Error: Wrong subscription type.") self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased", "Error: Wrong split algo.") procLogCollect = Fileset(name = "/TestWorkload/Production/unmerged-logArchive") procLogCollect.loadData() procLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Production/LogCollect") procLogCollectWorkflow.load() logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.") for outputName in ["OutputA", "OutputB"]: mergeLogCollect = Fileset(name = "/TestWorkload/Production/ProductionMerge%s/merged-logArchive" % outputName) mergeLogCollect.loadData() mergeLogCollectWorkflow = Workflow(name = "TestWorkload", task = "/TestWorkload/Production/ProductionMerge%s/Production%sMergeLogCollect" % (outputName, outputName)) mergeLogCollectWorkflow.load() logCollectSub = Subscription(fileset = mergeLogCollect, workflow = mergeLogCollectWorkflow) logCollectSub.loadData() self.assertEqual(logCollectSub["type"], "LogCollect", "Error: Wrong subscription type.") self.assertEqual(logCollectSub["split_algo"], "MinFileBased", "Error: Wrong split algo.")
def testCreateLoadWithUserAtt(self): """ _testLoad_ Create a workflow and then try to load it from the database using the following load methods: Workflow.LoadFromNameAndTask Workflow.LoadFromID Workflow.LoadFromSpecOwner Test if the Workflow is created correctly. """ testWorkflowA = Workflow(spec="spec.xml", owner="Simon", owner_vogroup='integration', owner_vorole='priority', name="wf001", task='Test', wfType="ReReco") testWorkflowA.create() testWorkflowB = Workflow(name="wf001", task='Test') testWorkflowB.load() self.assertTrue((testWorkflowA.id == testWorkflowB.id) and (testWorkflowA.name == testWorkflowB.name) and (testWorkflowA.spec == testWorkflowB.spec) and (testWorkflowA.task == testWorkflowB.task) and (testWorkflowA.wfType == testWorkflowB.wfType) and (testWorkflowA.owner == testWorkflowB.owner), "ERROR: Workflow.LoadFromNameAndTask Failed") testWorkflowC = Workflow(id=testWorkflowA.id) testWorkflowC.load() self.assertTrue((testWorkflowA.id == testWorkflowC.id) and (testWorkflowA.name == testWorkflowC.name) and (testWorkflowA.spec == testWorkflowC.spec) and (testWorkflowA.task == testWorkflowC.task) and (testWorkflowA.wfType == testWorkflowC.wfType) and (testWorkflowA.owner == testWorkflowC.owner), "ERROR: Workflow.LoadFromID Failed") testWorkflowD = Workflow(spec="spec.xml", owner="Simon", task='Test') testWorkflowD.load() self.assertTrue((testWorkflowA.id == testWorkflowD.id) and (testWorkflowA.name == testWorkflowD.name) and (testWorkflowA.spec == testWorkflowD.spec) and (testWorkflowA.task == testWorkflowD.task) and (testWorkflowA.wfType == testWorkflowD.wfType) and (testWorkflowA.owner == testWorkflowD.owner), "ERROR: Workflow.LoadFromSpecOwner Failed") testWorkflowE = Workflow(spec="spec_1.xml", owner="Simon", owner_vogroup='t1access', owner_vorole='t1access', name="wf002", task='Test_1', wfType="ReReco") testWorkflowE.create() self.assertTrue((testWorkflowE.id != testWorkflowA.id) and (testWorkflowE.name != testWorkflowA.name) and (testWorkflowE.spec != testWorkflowA.spec) and (testWorkflowE.vogroup != testWorkflowA.vogroup) and (testWorkflowE.vorole != testWorkflowA.vorole) and (testWorkflowE.task != testWorkflowA.task) and (testWorkflowE.wfType == testWorkflowA.wfType) and (testWorkflowE.owner == testWorkflowA.owner), "ERROR: Workflow.LoadFromSpecOwner Failed") testWorkflowA.delete() return
def pollSubscriptions(self): """ Poller for looking in all active subscriptions for jobs that need to be made. """ logging.info("Beginning JobCreator.pollSubscriptions() cycle.") myThread = threading.currentThread() #First, get list of Subscriptions subscriptions = self.subscriptionList.execute() # Okay, now we have a list of subscriptions for subscriptionID in subscriptions: wmbsSubscription = Subscription(id = subscriptionID) try: wmbsSubscription.load() except IndexError: # This happens when the subscription no longer exists # i.e., someone executed a kill() function on the database # while the JobCreator was in cycle # Ignore this subscription msg = "JobCreator cannot load subscription %i" % subscriptionID logging.error(msg) self.sendAlert(6, msg = msg) continue workflow = Workflow(id = wmbsSubscription["workflow"].id) workflow.load() wmbsSubscription['workflow'] = workflow wmWorkload = retrieveWMSpec(workflow = workflow) if not workflow.task or not wmWorkload: # Then we have a problem # We NEED a sandbox # Abort this subscription! # But do NOT fail # We have no way of marking a subscription as bad per se # We'll have to just keep skipping it msg = "Have no task for workflow %i\n" % (workflow.id) msg += "Aborting Subscription %i" % (subscriptionID) logging.error(msg) self.sendAlert(1, msg = msg) continue logging.debug("Have loaded subscription %i with workflow %i\n" % (subscriptionID, workflow.id)) # Set task object wmTask = wmWorkload.getTaskByPath(workflow.task) # Get generators # If you fail to load the generators, pass on the job try: if hasattr(wmTask.data, 'generators'): manager = GeneratorManager(wmTask) seederList = manager.getGeneratorList() else: seederList = [] except Exception, ex: msg = "Had failure loading generators for subscription %i\n" % (subscriptionID) msg += "Exception: %s\n" % str(ex) msg += "Passing over this error. It will reoccur next interation!\n" msg += "Please check or remove this subscription!\n" logging.error(msg) self.sendAlert(6, msg = msg) continue logging.debug("Going to call wmbsJobFactory for sub %i with limit %i" % (subscriptionID, self.limit)) splitParams = retrieveJobSplitParams(wmWorkload, workflow.task) logging.debug("Split Params: %s" % splitParams) # My hope is that the job factory is smart enough only to split un-split jobs splitterFactory = SplitterFactory(splitParams.get('algo_package', "WMCore.JobSplitting")) wmbsJobFactory = splitterFactory(package = "WMCore.WMBS", subscription = wmbsSubscription, generators=seederList, limit = self.limit) # Turn on the jobFactory wmbsJobFactory.open() # Create a function to hold it jobSplittingFunction = runSplitter(jobFactory = wmbsJobFactory, splitParams = splitParams) # Now we get to find out how many jobs there are. jobNumber = self.countJobs.execute(workflow = workflow.id, conn = myThread.transaction.conn, transaction = True) jobNumber += splitParams.get('initial_lfn_counter', 0) logging.debug("Have %i jobs for workflow %s already in database." % (jobNumber, workflow.name)) continueSubscription = True while continueSubscription: # This loop runs over the jobFactory, # using yield statements and a pre-existing proxy to # generate and process new jobs # First we need the jobs. myThread.transaction.begin() try: wmbsJobGroups = jobSplittingFunction.next() logging.info("Retrieved %i jobGroups from jobSplitter" % (len(wmbsJobGroups))) except StopIteration: # If you receive a stopIteration, we're done logging.info("Completed iteration over subscription %i" % (subscriptionID)) continueSubscription = False myThread.transaction.commit() break # If we have no jobGroups, we're done if len(wmbsJobGroups) == 0: logging.info("Found end in iteration over subscription %i" % (subscriptionID)) continueSubscription = False myThread.transaction.commit() break # Assemble a dict of all the info processDict = {'workflow': workflow, 'wmWorkload': wmWorkload, 'wmTaskName': wmTask.getPathName(), 'jobNumber': jobNumber, 'sandbox': wmTask.data.input.sandbox, 'owner': wmWorkload.getOwner().get('name', None), 'ownerDN': wmWorkload.getOwner().get('dn', None), 'ownerGroup': wmWorkload.getOwner().get('vogroup', ''), 'ownerRole': wmWorkload.getOwner().get('vorole', '')} tempSubscription = Subscription(id = wmbsSubscription['id']) nameDictList = [] for wmbsJobGroup in wmbsJobGroups: # For each jobGroup, put a dictionary # together and run it with creatorProcess jobsInGroup = len(wmbsJobGroup.jobs) wmbsJobGroup.subscription = tempSubscription tempDict = {} tempDict.update(processDict) tempDict['jobGroup'] = wmbsJobGroup tempDict['swVersion'] = wmTask.getSwVersion() tempDict['scramArch'] = wmTask.getScramArch() tempDict['jobNumber'] = jobNumber tempDict['agentNumber'] = self.agentNumber jobGroup = creatorProcess(work = tempDict, jobCacheDir = self.jobCacheDir) jobNumber += jobsInGroup # Set jobCache for group for job in jobGroup.jobs: nameDictList.append({'jobid':job['id'], 'cacheDir':job['cache_dir']}) job["user"] = wmWorkload.getOwner()["name"] job["group"] = wmWorkload.getOwner()["group"] # Set the caches in the database try: if len(nameDictList) > 0: self.setBulkCache.execute(jobDictList = nameDictList, conn = myThread.transaction.conn, transaction = True) except WMException: raise except Exception, ex: msg = "Unknown exception while setting the bulk cache:\n" msg += str(ex) logging.error(msg) self.sendAlert(6, msg = msg) logging.debug("Error while setting bulkCache with following values: %s\n" % nameDictList) raise JobCreatorException(msg) # Advance the jobGroup in changeState for wmbsJobGroup in wmbsJobGroups: self.advanceJobGroup(wmbsJobGroup = wmbsJobGroup) # Now end the transaction so that everything is wrapped # in a single rollback myThread.transaction.commit()
def testOutput(self): """ _testOutput_ Creat a workflow and add some outputs to it. Verify that these are stored to and loaded from the database correctly. """ testFilesetA = Fileset(name="testFilesetA") testMergedFilesetA = Fileset(name="testMergedFilesetA") testFilesetB = Fileset(name="testFilesetB") testMergedFilesetB = Fileset(name="testMergedFilesetB") testFilesetC = Fileset(name="testFilesetC") testMergedFilesetC = Fileset(name="testMergedFilesetC") testFilesetA.create() testFilesetB.create() testFilesetC.create() testMergedFilesetA.create() testMergedFilesetB.create() testMergedFilesetC.create() testWorkflowA = Workflow(spec="spec.xml", owner="Simon", name="wf001", task='Test') testWorkflowA.create() testWorkflowB = Workflow(name="wf001", task='Test') testWorkflowB.load() self.assertEqual(len(testWorkflowB.outputMap.keys()), 0, "ERROR: Output map exists before output is assigned") testWorkflowA.addOutput("outModOne", testFilesetA, testMergedFilesetA) testWorkflowA.addOutput("outModOne", testFilesetC, testMergedFilesetC) testWorkflowA.addOutput("outModTwo", testFilesetB, testMergedFilesetB) testWorkflowC = Workflow(name="wf001", task='Test') testWorkflowC.load() self.assertEqual(len(testWorkflowC.outputMap.keys()), 2, "ERROR: Incorrect number of outputs in output map") self.assertTrue("outModOne" in testWorkflowC.outputMap.keys(), "ERROR: Output modules missing from workflow output map") self.assertTrue("outModTwo" in testWorkflowC.outputMap.keys(), "ERROR: Output modules missing from workflow output map") for outputMap in testWorkflowC.outputMap["outModOne"]: if outputMap["output_fileset"].id == testFilesetA.id: self.assertEqual(outputMap["merged_output_fileset"].id, testMergedFilesetA.id, "Error: Output map incorrectly maps filesets.") else: self.assertEqual(outputMap["merged_output_fileset"].id, testMergedFilesetC.id, "Error: Output map incorrectly maps filesets.") self.assertEqual(outputMap["output_fileset"].id, testFilesetC.id, "Error: Output map incorrectly maps filesets.") self.assertEqual(testWorkflowC.outputMap["outModTwo"][0]["merged_output_fileset"].id, testMergedFilesetB.id, "Error: Output map incorrectly maps filesets.") self.assertEqual(testWorkflowC.outputMap["outModTwo"][0]["output_fileset"].id, testFilesetB.id, "Error: Output map incorrectly maps filesets.") return