Exemplo n.º 1
0
    def testMonteCarlo(self):
        """
        Create a Monte Carlo workflow and verify that it is injected correctly
        into WMBS and invoke its detailed test.

        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "rereco_t"
        defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig()

        testWorkload = monteCarloWorkload("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DWMWM")

        testWMBSHelper = WMBSHelper(testWorkload,
                                    "Production",
                                    "SomeBlock",
                                    cachepath=self.testInit.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask,
                                          testWMBSHelper.topLevelFileset)

        self._commonMonteCarloTest()

        return
Exemplo n.º 2
0
    def testSingleStepReDigi(self):
        """
        _testSingleStepReDigi_

        Verify that a single step ReDigi workflow can be created and installed
        correctly into WMBS.
        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "redigi_t"
        configs = self.injectReDigiConfigs()
        defaultArguments["StepOneConfigCacheID"] = configs[2]
        defaultArguments["StepTwoConfigCacheID"] = None
        defaultArguments["StepThreeConfigCacheID"] = None

        testWorkload = reDigiWorkload("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DWMWM")
        
        testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir)

        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        self.verifyKeepAOD()
        return
Exemplo n.º 3
0
    def testThreeStepChainedReDigi(self):
        """
        _testThreeStepChaninedReDigi_

        Verify that a chained ReDigi workflow that discards RAW and RECO data
        can be created and installed into WMBS correctly.  
        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "redigi_t"
        configs = self.injectReDigiConfigs()
        defaultArguments["StepOneConfigCacheID"] = configs[0]
        defaultArguments["StepTwoConfigCacheID"] = configs[1]
        defaultArguments["StepThreeConfigCacheID"] = configs[2]
        defaultArguments["KeepStepOneOutput"] = False
        defaultArguments["KeepStepTwoOutput"] = False

        testWorkload = reDigiWorkload("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DWMWM")
        
        testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock")
        testWMBSHelper.createSubscription()

        self.verifyKeepAOD()
        return    
Exemplo n.º 4
0
    def testChainedReDigi(self):
        """
        _testChaninedReDigi_

        Verify that a chained ReDigi workflow that discards RAW data can be
        created and installed into WMBS correctly.  This will only verify the
        step one/step two information in WMBS as the step three information is
        the same as the dependent workflow.
        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "redigi_t"
        configs = self.injectReDigiConfigs()
        defaultArguments["StepOneConfigCacheID"] = configs[0]
        defaultArguments["StepTwoConfigCacheID"] = configs[1]
        defaultArguments["StepThreeConfigCacheID"] = configs[2]
        defaultArguments["KeepStepOneOutput"] = False

        testWorkload = reDigiWorkload("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DWMWM")
        
        testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock")
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        self.verifyDiscardRAW()
        return
Exemplo n.º 5
0
    def testThreeStepChainedReDigi(self):
        """
        _testThreeStepChaninedReDigi_

        Verify that a chained ReDigi workflow that discards RAW and RECO data
        can be created and installed into WMBS correctly.
        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "redigi_t"
        configs = self.injectReDigiConfigs()
        defaultArguments["StepOneConfigCacheID"] = configs[0]
        defaultArguments["StepTwoConfigCacheID"] = configs[1]
        defaultArguments["StepThreeConfigCacheID"] = configs[2]
        defaultArguments["KeepStepOneOutput"] = False
        defaultArguments["KeepStepTwoOutput"] = False

        testWorkload = reDigiWorkload("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DWMWM")

        self.assertTrue(len(testWorkload.getTopLevelTask()) == 1,
                        "Error: Wrong number of top level tasks.")
        topLevelTask = testWorkload.getTopLevelTask()[0]
        topLevelStep = topLevelTask.steps()
        cmsRun2Step = topLevelStep.getStep("cmsRun2").getTypeHelper()
        self.assertTrue(len(cmsRun2Step.listOutputModules()) == 2,
                        "Error: Wrong number of output modules in cmsRun2.")

        testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        self.verifyKeepAOD()
        return
Exemplo n.º 6
0
    def testCombinedReDigiRecoConfig(self):
        """
        _testCombinedReDigiRecoConfig_

        Verify that a ReDigi workflow that uses a single step one config
        installs into WMBS correctly.
        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "redigi_t"
        configs = self.injectReDigiConfigs(combinedStepOne = True)
        defaultArguments["StepOneConfigCacheID"] = configs[0]
        defaultArguments["StepTwoConfigCacheID"] = configs[2]
        defaultArguments["StepThreeConfigCacheID"] = None
        defaultArguments["StepOneOutputModuleName"] = "RECODEBUGoutput"

        testWorkload = reDigiWorkload("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DWMWM")
        
        testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir)

        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        self.verifyDiscardRAW()
        return
Exemplo n.º 7
0
    def testRelValMCWithPileup(self):
        """
        Configure, instantiate, install into WMBS and check that the
        subscriptions in WMBS are setup correctly.

        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "relvalmc_t"

        defaultArguments["GenOutputModuleName"] = "OutputA"
        defaultArguments["StepOneOutputModuleName"] = "OutputB"
        defaultArguments["GenConfigCacheID"] = self.injectGenerationConfig()
        defaultArguments["StepOneConfigCacheID"] = self.injectStepOneConfig()
        defaultArguments["StepTwoConfigCacheID"] = self.injectStepTwoConfig()

        # add pile up information - for the generation task
        defaultArguments["PileupConfig"] = {"cosmics": ["/some/cosmics/dataset1","/some/cosmics/dataset2"],
                                            "minbias": ["/some/minbias/dataset3"]}

        testWorkload = relValMCWorkload("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DWMWM")

        testWMBSHelper = WMBSHelper(testWorkload, "Generation", "SomeBlock")
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        # now run the tests on single workload instance installed into WMBS
        # each of the subtests is dealing with specific tasks
        self._generationTaskTest()
        self._stepOneTaskTest()
        self._stepTwoTaskTest()
Exemplo n.º 8
0
    def testCombinedReDigiRecoConfig(self):
        """
        _testCombinedReDigiRecoConfig_

        Verify that a ReDigi workflow that uses a single step one config
        installs into WMBS correctly.
        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "redigi_t"
        configs = self.injectReDigiConfigs(combinedStepOne = True)
        defaultArguments["StepOneConfigCacheID"] = configs[0]
        defaultArguments["StepTwoConfigCacheID"] = configs[2]
        defaultArguments["StepThreeConfigCacheID"] = None
        defaultArguments["StepOneOutputModuleName"] = "RECODEBUGoutput"

        testWorkload = reDigiWorkload("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DWMWM")
        
        testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock")
        testWMBSHelper.createSubscription()

        self.verifyDiscardRAW()
        return    
Exemplo n.º 9
0
    def testRelValMCWithPileup(self):
        """
        Create a Monte Carlo workflow and verify that it is injected correctly
        into WMBS and invoke its detailed test.
        The input configuration includes pileup input files.

        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "rereco_t"
        defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig()

        # add pile up configuration
        defaultArguments["PileupConfig"] = {
            "mc": ["/some/cosmics/dataset1", "/some/cosmics/dataset2"],
            "data": ["/some/minbias/dataset3"]
        }

        testWorkload = monteCarloWorkload("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DWMWM")

        testWMBSHelper = WMBSHelper(testWorkload,
                                    "Production",
                                    "SomeBlock",
                                    cachepath=self.testInit.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask,
                                          testWMBSHelper.topLevelFileset)

        self._commonMonteCarloTest()

        return
Exemplo n.º 10
0
    def testRelValMCWithPileup(self):
        """
        Configure, instantiate, install into WMBS and check that the
        subscriptions in WMBS are setup correctly.
        
        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "relvalmc_t"

        defaultArguments["GenOutputModuleName"] = "OutputA"
        defaultArguments["StepOneOutputModuleName"] = "OutputB"
        defaultArguments["GenConfigCacheID"] = self.injectGenerationConfig()
        defaultArguments["StepOneConfigCacheID"] = self.injectStepOneConfig()
        defaultArguments["StepTwoConfigCacheID"] = self.injectStepTwoConfig()

        # add pile up information - for the generation task
        defaultArguments["PileupConfig"] = {
            "cosmics": ["/some/cosmics/dataset1", "/some/cosmics/dataset2"],
            "minbias": ["/some/minbias/dataset3"],
        }

        testWorkload = relValMCWorkload("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DWMWM")

        testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock")
        testWMBSHelper.createSubscription()

        # now run the tests on single workload instance installed into WMBS
        # each of the subtests is dealing with specific tasks
        self._generationTaskTest()
        self._stepOneTaskTest()
        self._stepTwoTaskTest()
Exemplo n.º 11
0
 def testRelValMC(self):
     """
     Configure, instantiate, install into WMBS and check that the
     subscriptions in WMBS are setup correctly.
     
     """
     defaultArguments = getTestArguments()
     defaultArguments["CouchURL"] = os.environ["COUCHURL"]
     defaultArguments["CouchDBName"] = "relvalmc_t"
     defaultArguments["GenOutputModuleName"] = "OutputA"
     defaultArguments["StepOneOutputModuleName"] = "OutputB"
     defaultArguments["GenConfigCacheID"] = self.injectGenerationConfig()
     defaultArguments["StepOneConfigCacheID"] = self.injectStepOneConfig()
     defaultArguments["StepTwoConfigCacheID"] = self.injectStepTwoConfig() 
     
     testWorkload = relValMCWorkload("TestWorkload", defaultArguments)
     testWorkload.setSpecUrl("somespec")
     testWorkload.setOwnerDetails("*****@*****.**", "DWMWM")
     
     testWMBSHelper = WMBSHelper(testWorkload, "Generation", "SomeBlock")
     testWMBSHelper.createTopLevelFileset()
     testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)
     
     # now run the tests on single workload instance installed into WMBS
     # each of the subtests is dealing with specific tasks
     self._generationTaskTest()
     self._stepTwoTaskTest()
     self._stepOneTaskTest()
     return
Exemplo n.º 12
0
    def testRelValMCWithPileup(self):
        """
        Create a Monte Carlo workflow and verify that it is injected correctly
        into WMBS and invoke its detailed test.
        The input configuration includes pileup input files.        
        
        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "rereco_t"
        defaultArguments["ProcConfigCacheID"] = self.injectMonteCarloConfig()

        # add pile up configuration
        defaultArguments["PileupConfig"] = {"mc": ["/some/cosmics/dataset1", "/some/cosmics/dataset2"],
                                            "data": ["/some/minbias/dataset3"]}

        testWorkload = monteCarloWorkload("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DWMWM")

        testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath = self.testInit.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        self._commonMonteCarloTest()

        return
Exemplo n.º 13
0
    def testMonteCarloExtension(self):
        """
        Create a Monte Carlo workflow and verify that it is injected correctly
        into WMBS and invoke its detailed test. This uses a non-zero first
        event and lumi. Check that the splitting arguments are correctly
        set for the lfn counter.

        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "rereco_t"
        defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig()
        defaultArguments["FirstEvent"] = 3571428573
        defaultArguments["FirstLumi"] = 26042
        defaultArguments["TimePerEvent"] = 15
        defaultArguments["FilterEfficiency"] = 0.014
        defaultArguments["TotalTime"] = 28800

        initial_lfn_counter = 26042  # Same as the previous number of jobs + 1 which is the same value of the first lumi

        testWorkload = monteCarloWorkload("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DWMWM")

        testWMBSHelper = WMBSHelper(testWorkload,
                                    "Production",
                                    "SomeBlock",
                                    cachepath=self.testInit.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask,
                                          testWMBSHelper.topLevelFileset)

        self._commonMonteCarloTest()

        productionTask = testWorkload.getTaskByPath('/TestWorkload/Production')
        productionSplitting = productionTask.jobSplittingParameters()
        self.assertTrue("initial_lfn_counter" in productionSplitting,
                        "No initial lfn counter was stored")
        self.assertEqual(productionSplitting["initial_lfn_counter"],
                         initial_lfn_counter, "Wrong initial LFN counter")

        for outputMod in ["OutputA", "OutputB"]:
            mergeTask = testWorkload.getTaskByPath(
                '/TestWorkload/Production/ProductionMerge%s' % outputMod)
            mergeSplitting = mergeTask.jobSplittingParameters()
            self.assertTrue("initial_lfn_counter" in mergeSplitting,
                            "No initial lfn counter was stored")
            self.assertEqual(mergeSplitting["initial_lfn_counter"],
                             initial_lfn_counter, "Wrong initial LFN counter")

        return
Exemplo n.º 14
0
    def testMonteCarlo(self):
        """
        Create a Monte Carlo workflow and verify that it is injected correctly
        into WMBS and invoke its detailed test.        
        
        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "rereco_t"        
        defaultArguments["ProdConfigCacheID"] = self.injectMonteCarloConfig()

        testWorkload = monteCarloWorkload("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DWMWM")
        
        testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock")
        testWMBSHelper.createSubscription()
        
        self._commonMonteCarloTest()
Exemplo n.º 15
0
    def testMonteCarloExtension(self):
        """
        Create a Monte Carlo workflow and verify that it is injected correctly
        into WMBS and invoke its detailed test. This uses a non-zero first
        event and lumi. Check that the splitting arguments are correctly
        set for the lfn counter.

        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "rereco_t"
        defaultArguments["ProcConfigCacheID"] = self.injectMonteCarloConfig()
        defaultArguments["FirstEvent"] = 3571428573
        defaultArguments["FirstLumi"] = 26042
        defaultArguments["TimePerEvent"] = 15
        defaultArguments["FilterEfficiency"] = 0.014
        defaultArguments["TotalTime"] = 28800

        initial_lfn_counter = 26042 # Same as the previous number of jobs + 1 which is the same value of the first lumi

        testWorkload = monteCarloWorkload("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DWMWM")

        testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath = self.testInit.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        self._commonMonteCarloTest()

        productionTask = testWorkload.getTaskByPath('/TestWorkload/Production')
        productionSplitting = productionTask.jobSplittingParameters()
        self.assertTrue("initial_lfn_counter" in productionSplitting, "No initial lfn counter was stored")
        self.assertEqual(productionSplitting["initial_lfn_counter"], initial_lfn_counter, "Wrong initial LFN counter")

        for outputMod in ["OutputA", "OutputB"]:
            mergeTask = testWorkload.getTaskByPath('/TestWorkload/Production/ProductionMerge%s' % outputMod)
            mergeSplitting = mergeTask.jobSplittingParameters()
            self.assertTrue("initial_lfn_counter" in mergeSplitting, "No initial lfn counter was stored")
            self.assertEqual(mergeSplitting["initial_lfn_counter"], initial_lfn_counter, "Wrong initial LFN counter")

        return
Exemplo n.º 16
0
    def testLHEStepZero(self):
        """
        _testLHEStepZero_
        Make sure that the workload can be created
        and complies with the common MonteCarlo test
        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "rereco_t"
        defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig()

        testWorkload = lheStepZeroWorkload("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DWMWM")

        testWMBSHelper = WMBSHelper(testWorkload, "Production", "SomeBlock", cachepath = self.testInit.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        self._commonMonteCarloTest()
Exemplo n.º 17
0
    def testChainedReDigi(self):
        """
        _testChaninedReDigi_

        Verify that a chained ReDigi workflow that discards RAW data can be
        created and installed into WMBS correctly.  This will only verify the
        step one/step two information in WMBS as the step three information is
        the same as the dependent workflow.
        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "redigi_t"
        configs = self.injectReDigiConfigs()
        defaultArguments["StepOneConfigCacheID"] = configs[0]
        defaultArguments["StepTwoConfigCacheID"] = configs[1]
        defaultArguments["StepThreeConfigCacheID"] = configs[2]
        defaultArguments["KeepStepOneOutput"] = False

        testWorkload = reDigiWorkload("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DWMWM")

        # Verify that pileup is configured for both of the cmsRun steps in the
        # top level task.
        topLevelTask = testWorkload.getTopLevelTask()[0]
        cmsRun1Helper = topLevelTask.getStepHelper("cmsRun1")
        cmsRun2Helper = topLevelTask.getStepHelper("cmsRun2")
        cmsRun1PileupConfig = cmsRun1Helper.getPileup()
        cmsRun2PileupConfig = cmsRun2Helper.getPileup()

        self.assertTrue(cmsRun1PileupConfig.mc.dataset, "/some/cosmics/dataset")
        self.assertTrue(cmsRun2PileupConfig.mc.dataset, "/some/cosmics/dataset")
        
        testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        self.verifyDiscardRAW()
        return
Exemplo n.º 18
0
    def testLHEStepZero(self):
        """
        _testLHEStepZero_
        Make sure that the workload can be created
        and complies with the common MonteCarlo test
        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "rereco_t"
        defaultArguments["ConfigCacheID"] = self.injectMonteCarloConfig()

        testWorkload = lheStepZeroWorkload("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DWMWM")

        testWMBSHelper = WMBSHelper(testWorkload,
                                    "Production",
                                    "SomeBlock",
                                    cachepath=self.testInit.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask,
                                          testWMBSHelper.topLevelFileset)

        self._commonMonteCarloTest()
Exemplo n.º 19
0
    def testDataProcessing(self):
        """
        _testDataProcessing_

        Create a data processing workflow and verify it installs into WMBS
        correctly. Check that we can drop an output module.
        """
        testArgs = getTestArguments()
        testArgs['TransientOutputModules'] = ['RECOoutput']
        testWorkload = dataProcessingWorkload("TestWorkload", testArgs)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")

        testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock", cachepath = self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        procWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/DataProcessing")
        procWorkflow.load()

        self.assertEqual(len(procWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["RECOoutput", "ALCARECOoutput"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            if goldenOutputMod in testArgs["TransientOutputModules"]:
                self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod,
                                 "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            else:
                self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod,
                                 "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

        logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            if goldenOutputMod in testArgs["TransientOutputModules"]:
                # No merge for this output module
                continue
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/DataProcessing/DataProcessingMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name = "TestWorkload-DataProcessing-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow)
        procSubscription.loadData()

        self.assertEqual(procSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "LumiBased",
                         "Error: Wrong split algo.")

        unmergedReco = Fileset(name = "/TestWorkload/DataProcessing/unmerged-RECOoutput")
        unmergedReco.loadData()
        recoMergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput")
        # No merge workflow should exist in WMBS
        self.assertRaises(IndexError, recoMergeWorkflow.load)

        unmergedAlca = Fileset(name = "/TestWorkload/DataProcessing/unmerged-ALCARECOoutput")
        unmergedAlca.loadData()
        alcaMergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput")
        alcaMergeWorkflow.load()
        mergeSubscription = Subscription(fileset = unmergedAlca, workflow = alcaMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo.")

        for procOutput in ["RECOoutput", "ALCARECOoutput"]:
            unmerged = Fileset(name = "/TestWorkload/DataProcessing/unmerged-%s" % procOutput)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/DataProcessing/DataProcessingCleanupUnmerged%s" % procOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/unmerged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/DataProcessing/LogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput/merged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput/DataProcessingALCARECOoutputMergeLogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        return
Exemplo n.º 20
0
    def testCreateSubscription(self):
        """
        _testCreateSubscription_

        Verify that the subscription creation code works correctly.
        """
        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName='site1',
                                   seName='goodse.cern.ch',
                                   ceName='site1',
                                   plugin="TestPlugin")
        resourceControl.insertSite(siteName='site2',
                                   seName='goodse2.cern.ch',
                                   ceName='site2',
                                   plugin="TestPlugin")

        testWorkload = self.createTestWMSpec()
        testTopLevelTask = getFirstTask(testWorkload)
        testWMBSHelper = WMBSHelper(testWorkload,
                                    testTopLevelTask.name(),
                                    "SomeBlock",
                                    cachepath=self.workDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testTopLevelTask,
                                          testWMBSHelper.topLevelFileset)

        procWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/ProcessingTask")
        procWorkflow.load()

        self.assertEqual(procWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner: %s" % procWorkflow.owner)
        self.assertEqual(procWorkflow.group, "DMWM",
                         "Error: Wrong group: %s" % procWorkflow.group)
        self.assertEqual(procWorkflow.wfType, "TestReReco",
                         "Error: Wrong type.")
        self.assertEqual(
            procWorkflow.spec,
            os.path.join(self.workDir, procWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(procWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        mergedProcOutput = procWorkflow.outputMap["OutputA"][0][
            "merged_output_fileset"]
        unmergedProcOutput = procWorkflow.outputMap["OutputA"][0][
            "output_fileset"]

        mergedProcOutput.loadData()
        unmergedProcOutput.loadData()

        self.assertEqual(
            mergedProcOutput.name,
            "/TestWorkload/ProcessingTask/MergeTask/merged-Merged",
            "Error: Merged output fileset is wrong.")
        self.assertEqual(unmergedProcOutput.name,
                         "/TestWorkload/ProcessingTask/unmerged-OutputA",
                         "Error: Unmerged output fileset is wrong.")

        mergeWorkflow = Workflow(name="TestWorkload",
                                 task="/TestWorkload/ProcessingTask/MergeTask")
        mergeWorkflow.load()

        self.assertEqual(mergeWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(
            mergeWorkflow.spec,
            os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        cleanupWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/ProcessingTask/CleanupTask")
        cleanupWorkflow.load()

        self.assertEqual(cleanupWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(
            cleanupWorkflow.spec,
            os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0,
                         "Error: Wrong number of WF outputs.")

        unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
            "output_fileset"]
        unmergedMergeOutput.loadData()

        self.assertEqual(
            unmergedMergeOutput.name,
            "/TestWorkload/ProcessingTask/MergeTask/merged-Merged",
            "Error: Unmerged output fileset is wrong.")

        skimWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/ProcessingTask/MergeTask/SkimTask")
        skimWorkflow.load()

        self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.")
        self.assertEqual(
            skimWorkflow.spec,
            os.path.join(self.workDir, skimWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(skimWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "merged_output_fileset"]
        unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "output_fileset"]
        mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "merged_output_fileset"]
        unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "output_fileset"]

        mergedSkimOutputA.loadData()
        mergedSkimOutputB.loadData()
        unmergedSkimOutputA.loadData()
        unmergedSkimOutputB.loadData()

        self.assertEqual(
            mergedSkimOutputA.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Merged output fileset is wrong: %s" %
            mergedSkimOutputA.name)
        self.assertEqual(
            unmergedSkimOutputA.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Unmerged output fileset is wrong.")
        self.assertEqual(
            mergedSkimOutputB.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Merged output fileset is wrong.")
        self.assertEqual(
            unmergedSkimOutputB.name,
            "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Unmerged output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-ProcessingTask-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset=topLevelFileset,
                                        workflow=procWorkflow)
        procSubscription.loadData()

        self.assertEqual(len(procSubscription.getWhiteBlackList()), 2,
                         "Error: Wrong site white/black list for proc sub.")
        for site in procSubscription.getWhiteBlackList():
            if site["site_name"] == "site1":
                self.assertEqual(site["valid"], 1,
                                 "Error: Site should be white listed.")
            else:
                self.assertEqual(site["valid"], 0,
                                 "Error: Site should be black listed.")

        self.assertEqual(procSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        mergeSubscription = Subscription(fileset=unmergedProcOutput,
                                         workflow=mergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0,
                         "Error: Wrong white/black list for merge sub.")

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                         "Error: Wrong split algo.")

        skimSubscription = Subscription(fileset=unmergedMergeOutput,
                                        workflow=skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")
        return
Exemplo n.º 21
0
    def testPrivateMC(self):
        """
        _testAnalysis_
        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "privatemc_t"
        defaultArguments["AnalysisConfigCacheDoc"] = self.injectAnalysisConfig(
        )
        defaultArguments["ProcessingVersion"] = 1

        processingFactory = PrivateMCWorkloadFactory()
        testWorkload = processingFactory("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")

        testWMBSHelper = WMBSHelper(testWorkload, "PrivateMC", "SomeBlock")
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask,
                                          testWMBSHelper.topLevelFileset)
        procWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/PrivateMC")
        procWorkflow.load()
        self.assertEqual(
            len(procWorkflow.outputMap.keys()), 3,
            "Error: Wrong number of WF outputs: %s" %
            len(procWorkflow.outputMap.keys()))

        logArchOutput = procWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]  #Actually Analysis does not have a merge task
        unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()
        self.assertEqual(logArchOutput.name,
                         "/TestWorkload/PrivateMC/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name,
                         "/TestWorkload/PrivateMC/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = ["OutputA", "OutputB"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = procWorkflow.outputMap[goldenOutputMod][0][
                "merged_output_fileset"]
            unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0][
                "output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(
                mergedOutput.name,
                "/TestWorkload/PrivateMC/unmerged-%s" % goldenOutputMod,
                "Error: Merged output fileset is wrong: %s" %
                mergedOutput.name)
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/PrivateMC/unmerged-%s" % goldenOutputMod,
                "Error: Unmerged output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-PrivateMC-SomeBlock")
        topLevelFileset.loadData()
        procSubscription = Subscription(fileset=topLevelFileset,
                                        workflow=procWorkflow)
        procSubscription.loadData()
        self.assertEqual(procSubscription["type"], "PrivateMC",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "EventBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(
            name="/TestWorkload/PrivateMC/unmerged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(
            name="TestWorkload", task="/TestWorkload/PrivateMC/LogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=procLogCollect,
                                     workflow=procLogCollectWorkflow)
        logCollectSub.loadData()
        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")
Exemplo n.º 22
0
    def testCreateSubscription(self):
        """
        _testCreateSubscription_

        Verify that the subscription creation code works correctly.
        """
        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName = 'site1', seName = 'goodse.cern.ch',
                                   ceName = 'site1', plugin = "TestPlugin")
        resourceControl.insertSite(siteName = 'site2', seName = 'goodse2.cern.ch',
                                   ceName = 'site2', plugin = "TestPlugin")        

        testWorkload = self.createTestWMSpec()
        testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock", cachepath = self.workDir)
        testWMBSHelper.createSubscription()

        procWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/ProcessingTask")
        procWorkflow.load()

        self.assertEqual(procWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner: %s" % procWorkflow.owner)
        self.assertEqual(procWorkflow.group, "DMWM",
                         "Error: Wrong group: %s" % procWorkflow.group)
        self.assertEqual(procWorkflow.wfType, "TestReReco",
                         "Error: Wrong type.")
        self.assertEqual(procWorkflow.spec, os.path.join(self.workDir, procWorkflow.name,
                                                         "WMSandbox", "WMWorkload.pkl"),
                         "Error: Wrong spec URL")
        self.assertEqual(len(procWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        mergedProcOutput = procWorkflow.outputMap["OutputA"][0]["merged_output_fileset"]
        unmergedProcOutput = procWorkflow.outputMap["OutputA"][0]["output_fileset"]

        mergedProcOutput.loadData()
        unmergedProcOutput.loadData()

        self.assertEqual(mergedProcOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged",
                         "Error: Merged output fileset is wrong.")
        self.assertEqual(unmergedProcOutput.name, "/TestWorkload/ProcessingTask/unmerged-OutputA",
                         "Error: Unmerged output fileset is wrong.")

        mergeWorkflow = Workflow(name = "TestWorkload",
                                 task = "/TestWorkload/ProcessingTask/MergeTask")
        mergeWorkflow.load()

        self.assertEqual(mergeWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name,
                                                          "WMSandbox", "WMWorkload.pkl"),
                         "Error: Wrong spec URL")
        self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        cleanupWorkflow = Workflow(name = "TestWorkload",
                                 task = "/TestWorkload/ProcessingTask/CleanupTask")
        cleanupWorkflow.load()

        self.assertEqual(cleanupWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name,
                                                            "WMSandbox", "WMWorkload.pkl"),
                         "Error: Wrong spec URL")
        self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0,
                         "Error: Wrong number of WF outputs.")        

        unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]
        unmergedMergeOutput.loadData()

        self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/ProcessingTask/MergeTask/merged-Merged",
                         "Error: Unmerged output fileset is wrong.")

        skimWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/ProcessingTask/MergeTask/SkimTask")
        skimWorkflow.load()

        self.assertEqual(skimWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name,
                                                        "WMSandbox", "WMWorkload.pkl"),
                         "Error: Wrong spec URL")
        self.assertEqual(len(skimWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"]
        unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"]
        mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"]
        unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"]

        mergedSkimOutputA.loadData()
        mergedSkimOutputB.loadData()
        unmergedSkimOutputA.loadData()
        unmergedSkimOutputB.loadData()

        self.assertEqual(mergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA",
                         "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name)
        self.assertEqual(unmergedSkimOutputA.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputA",
                         "Error: Unmerged output fileset is wrong.")
        self.assertEqual(mergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB",
                         "Error: Merged output fileset is wrong.")
        self.assertEqual(unmergedSkimOutputB.name, "/TestWorkload/ProcessingTask/MergeTask/SkimTask/unmerged-SkimOutputB",
                         "Error: Unmerged output fileset is wrong.")

        topLevelFileset = Fileset(name = "TestWorkload-ProcessingTask-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow)
        procSubscription.loadData()

        self.assertEqual(len(procSubscription.getWhiteBlackList()), 2,
                         "Error: Wrong site white/black list for proc sub.")
        for site in procSubscription.getWhiteBlackList():
            if site["site_name"] == "site1":
                self.assertEqual(site["valid"], 1,
                                 "Error: Site should be white listed.")
            else:
                self.assertEqual(site["valid"], 0,
                                 "Error: Site should be black listed.")                

        self.assertEqual(procSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        mergeSubscription = Subscription(fileset = unmergedProcOutput, workflow = mergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0,
                         "Error: Wrong white/black list for merge sub.")

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                         "Error: Wrong split algo.")        

        skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")
        return
Exemplo n.º 23
0
    def testAnalysis(self):
        """
        _testAnalysis_
        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchUrl"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "analysis_t"
        defaultArguments["AnalysisConfigCacheDoc"] = self.injectAnalysisConfig()
        defaultArguments["ProcessingVersion"] = 'v1'

        analysisProcessingFactory = AnalysisWorkloadFactory()
        testWorkload = analysisProcessingFactory("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")

        testWMBSHelper = WMBSHelper(testWorkload, "Analysis", "SomeBlock")
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)
        procWorkflow = Workflow(name = "TestWorkload",
                              task = "/TestWorkload/Analysis")
        procWorkflow.load()
        self.assertEqual(len(procWorkflow.outputMap.keys()), 2,
                                  "Error: Wrong number of WF outputs.")

        logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]#Actually Analysis does not have a merge task
        unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()
        self.assertEqual(logArchOutput.name, "/TestWorkload/Analysis/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Analysis/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        output = procWorkflow.outputMap["output"][0]["output_fileset"]
        mergedOutput = procWorkflow.outputMap["output"][0]["merged_output_fileset"]
        output.loadData()
        mergedOutput.loadData()
        self.assertEqual(output.name, "/TestWorkload/Analysis/unmerged-output",
                             "Error: Unmerged output fileset is wrong: " + output.name)
        self.assertEqual(mergedOutput.name, "/TestWorkload/Analysis/unmerged-output",
                             "Error: Unmerged output fileset is wrong: " + mergedOutput.name)

        topLevelFileset = Fileset(name = "TestWorkload-Analysis-SomeBlock")
        topLevelFileset.loadData()
        procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow)
        procSubscription.loadData()
        self.assertEqual(procSubscription["type"], "Analysis",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "EventBased",
                         "Error: Wrong split algo.")


        procLogCollect = Fileset(name = "/TestWorkload/Analysis/unmerged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/Analysis/LogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()
        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")
Exemplo n.º 24
0
def releasePromptReco(tier0Config, specDirectory, dqmUploadProxy):
    """
    _releasePromptReco_

    Called by Tier0Feeder

    Finds all run/primds that need to be released for PromptReco
    ( run.end_time + reco_release_config.delay > now
      AND run.end_time > 0 )

    Create workflows and subscriptions for the processing
    of runs/datasets.

    """
    logging.debug("releasePromptReco()")
    myThread = threading.currentThread()

    daoFactory = DAOFactory(package = "T0.WMBS",
                            logger = logging,
                            dbinterface = myThread.dbi)

    insertDatasetScenarioDAO = daoFactory(classname = "RunConfig.InsertDatasetScenario")
    insertCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion")
    insertRecoConfigDAO = daoFactory(classname = "RunConfig.InsertRecoConfig")
    insertStorageNodeDAO = daoFactory(classname = "RunConfig.InsertStorageNode")
    insertPhEDExConfigDAO = daoFactory(classname = "RunConfig.InsertPhEDExConfig")
    releasePromptRecoDAO = daoFactory(classname = "RunConfig.ReleasePromptReco")
    insertWorkflowMonitoringDAO = daoFactory(classname = "RunConfig.InsertWorkflowMonitoring")

    bindsDatasetScenario = []
    bindsCMSSWVersion = []
    bindsRecoConfig = []
    bindsStorageNode = []
    bindsReleasePromptReco = []

    # mark workflows as injected
    wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS",
                                logger = logging,
                                dbinterface = myThread.dbi)
    markWorkflowsInjectedDAO   = wmbsDaoFactory(classname = "Workflow.MarkInjectedWorkflows")

    #
    # for creating PromptReco specs
    #
    recoSpecs = {}

    #
    # for PhEDEx subscription settings
    #
    subscriptions = []

    findRecoReleaseDAO = daoFactory(classname = "RunConfig.FindRecoRelease")
    recoRelease = findRecoReleaseDAO.execute(transaction = False)

    for run in sorted(recoRelease.keys()):

        # retrieve some basic run information
        getRunInfoDAO = daoFactory(classname = "RunConfig.GetRunInfo")
        runInfo = getRunInfoDAO.execute(run, transaction = False)[0]

        # retrieve phedex configs for run
        getPhEDExConfigDAO = daoFactory(classname = "RunConfig.GetPhEDExConfig")
        phedexConfigs = getPhEDExConfigDAO.execute(run, transaction = False)

        for (dataset, fileset, repackProcVer) in recoRelease[run]:

            bindsReleasePromptReco.append( { 'RUN' : run,
                                             'PRIMDS' : dataset,
                                             'NOW' : int(time.time()) } )

            datasetConfig = retrieveDatasetConfig(tier0Config, dataset)

            bindsDatasetScenario.append( { 'RUN' : run,
                                           'PRIMDS' : dataset,
                                           'SCENARIO' : datasetConfig.Scenario } )

            if datasetConfig.CMSSWVersion != None:
                bindsCMSSWVersion.append( { 'VERSION' : datasetConfig.CMSSWVersion } )

            alcaSkim = None
            if len(datasetConfig.AlcaSkims) > 0:
                alcaSkim = ",".join(datasetConfig.AlcaSkims)

            dqmSeq = None
            if len(datasetConfig.DqmSequences) > 0:
                dqmSeq = ",".join(datasetConfig.DqmSequences)

            datasetConfig.ScramArch = tier0Config.Global.ScramArches.get(datasetConfig.CMSSWVersion,
                                                                         tier0Config.Global.DefaultScramArch)

            bindsRecoConfig.append( { 'RUN' : run,
                                      'PRIMDS' : dataset,
                                      'DO_RECO' : int(datasetConfig.DoReco),
                                      'RECO_SPLIT' : datasetConfig.RecoSplit,
                                      'WRITE_RECO' : int(datasetConfig.WriteRECO),
                                      'WRITE_DQM' : int(datasetConfig.WriteDQM),
                                      'WRITE_AOD' : int(datasetConfig.WriteAOD),
                                      'PROC_VER' : datasetConfig.ProcessingVersion,
                                      'ALCA_SKIM' : alcaSkim,
                                      'DQM_SEQ' : dqmSeq,
                                      'BLOCK_DELAY' : datasetConfig.BlockCloseDelay,
                                      'CMSSW' : datasetConfig.CMSSWVersion,
                                      'SCRAM_ARCH' : datasetConfig.ScramArch,
                                      'MULTICORE' : datasetConfig.Multicore,
                                      'GLOBAL_TAG' : datasetConfig.GlobalTag } )

            phedexConfig = phedexConfigs[dataset]

            if datasetConfig.WriteAOD:

                custodialSites = []
                nonCustodialSites = []
                autoApproveSites = []

                if phedexConfig['tape_node'] != None:
                    custodialSites.append(phedexConfig['tape_node'])
                if phedexConfig['disk_node'] != None:
                    nonCustodialSites.append(phedexConfig['disk_node'])
                    autoApproveSites.append(phedexConfig['disk_node'])

                subscriptions.append( { 'custodialSites' : custodialSites,
                                            'custodialSubType' : "Replica",
                                            'nonCustodialSites' : nonCustodialSites,
                                            'autoApproveSites' : autoApproveSites,
                                            'priority' : "high",
                                            'primaryDataset' : dataset,
                                            'dataTier' : "AOD" } )

            if len(datasetConfig.AlcaSkims) > 0:
                if phedexConfig['tape_node'] != None:
                    subscriptions.append( { 'custodialSites' : [phedexConfig['tape_node']],
                                            'custodialSubType' : "Replica",
                                            'nonCustodialSites' : [],
                                            'autoApproveSites' : [],
                                            'priority' : "high",
                                            'primaryDataset' : dataset,
                                            'dataTier' : "ALCARECO" } )
            if datasetConfig.WriteDQM:
                if phedexConfig['tape_node'] != None:
                    subscriptions.append( { 'custodialSites' : [phedexConfig['tape_node']],
                                            'custodialSubType' : "Replica",
                                            'nonCustodialSites' : [],
                                            'autoApproveSites' : [],
                                            'priority' : "high",
                                            'primaryDataset' : dataset,
                                            'dataTier' : "DQM" } )

            if datasetConfig.WriteRECO:
                if phedexConfig['disk_node'] != None:
                    subscriptions.append( { 'custodialSites' : [],
                                            'nonCustodialSites' : [phedexConfig['disk_node']],
                                            'autoApproveSites' : [phedexConfig['disk_node']],
                                            'priority' : "high",
                                            'primaryDataset' : dataset,
                                            'dataTier' : "RECO" } )

            writeTiers = []
            if datasetConfig.WriteRECO:
                writeTiers.append("RECO")
            if datasetConfig.WriteAOD:
                writeTiers.append("AOD")
            if datasetConfig.WriteDQM:
                writeTiers.append("DQM")
            if len(datasetConfig.AlcaSkims) > 0:
                writeTiers.append("ALCARECO")

            if datasetConfig.DoReco and len(writeTiers) > 0:

                #
                # create WMSpec
                #
                taskName = "Reco"
                workflowName = "PromptReco_Run%d_%s" % (run, dataset)

                specArguments = {}

                specArguments['Group'] = "unknown"
                specArguments['Requestor'] = "unknown"
                specArguments['RequestorDN'] = "unknown"

                specArguments['TimePerEvent'] = 12
                specArguments['SizePerEvent'] = 512
                specArguments['Memory'] = 1800

                if datasetConfig.Multicore:
                    specArguments['Multicore'] = datasetConfig.Multicore
                    specArguments['Memory'] = 1800 * datasetConfig.Multicore

                specArguments['RequestPriority'] = 0

                specArguments['AcquisitionEra'] = runInfo['acq_era']
                specArguments['CMSSWVersion'] = datasetConfig.CMSSWVersion
                specArguments['ScramArch'] = datasetConfig.ScramArch

                specArguments['RunNumber'] = run

                specArguments['SplittingAlgo'] = "EventBased"
                specArguments['EventsPerJob'] = datasetConfig.RecoSplit

                specArguments['ProcessingString'] = "PromptReco"
                specArguments['ProcessingVersion'] = datasetConfig.ProcessingVersion
                specArguments['Scenario'] = datasetConfig.Scenario

                specArguments['GlobalTag'] = datasetConfig.GlobalTag
                specArguments['GlobalTagConnect'] = datasetConfig.GlobalTagConnect

                specArguments['InputDataset'] = "/%s/%s-%s/RAW" % (dataset, runInfo['acq_era'], repackProcVer)

                specArguments['WriteTiers'] = writeTiers
                specArguments['AlcaSkims'] = datasetConfig.AlcaSkims
                specArguments['DqmSequences'] = datasetConfig.DqmSequences

                specArguments['UnmergedLFNBase'] = "/store/unmerged/%s" % runInfo['bulk_data_type']
                if runInfo['backfill']:
                    specArguments['MergedLFNBase'] = "/store/backfill/%s/%s" % (runInfo['backfill'],
                                                                                runInfo['bulk_data_type'])
                else:
                    specArguments['MergedLFNBase'] = "/store/%s" % runInfo['bulk_data_type']

                specArguments['ValidStatus'] = "VALID"

                specArguments['EnableHarvesting'] = "True"
                specArguments['DQMUploadProxy'] = dqmUploadProxy
                specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl']

                specArguments['BlockCloseDelay'] = datasetConfig.BlockCloseDelay

                specArguments['SiteWhitelist'] = datasetConfig.SiteWhitelist
                specArguments['SiteBlacklist'] = []
                specArguments['TrustSitelists'] = "True"

                # not used, but needed by the validation
                specArguments['CouchURL'] = "http://*****:*****@cern.ch", "T0",
                                       { 'vogroup': 'DEFAULT', 'vorole': 'DEFAULT',
                                         'dn' : "*****@*****.**" } )

                wmSpec.setupPerformanceMonitoring(maxRSS = 10485760, maxVSize = 10485760,
                                                  softTimeout = 604800, gracePeriod = 3600)

                wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath = specDirectory)

                recoSpecs[workflowName] = (wmbsHelper, wmSpec, fileset)

    try:
        myThread.transaction.begin()
        if len(bindsDatasetScenario) > 0:
            insertDatasetScenarioDAO.execute(bindsDatasetScenario, conn = myThread.transaction.conn, transaction = True)
        if len(bindsCMSSWVersion) > 0:
            insertCMSSWVersionDAO.execute(bindsCMSSWVersion, conn = myThread.transaction.conn, transaction = True)
        if len(bindsRecoConfig) > 0:
            insertRecoConfigDAO.execute(bindsRecoConfig, conn = myThread.transaction.conn, transaction = True)
        if len(bindsStorageNode) > 0:
            insertStorageNodeDAO.execute(bindsStorageNode, conn = myThread.transaction.conn, transaction = True)
        if len(bindsReleasePromptReco) > 0:
            releasePromptRecoDAO.execute(bindsReleasePromptReco, conn = myThread.transaction.conn, transaction = True)
        for (wmbsHelper, wmSpec, fileset) in recoSpecs.values():
            wmbsHelper.createSubscription(wmSpec.getTask(taskName), Fileset(id = fileset), alternativeFilesetClose = True)
            insertWorkflowMonitoringDAO.execute([fileset],  conn = myThread.transaction.conn, transaction = True)
        if len(recoSpecs) > 0:
            markWorkflowsInjectedDAO.execute(recoSpecs.keys(), injected = True, conn = myThread.transaction.conn, transaction = True)
    except Exception as ex:
        logging.exception(ex)
        myThread.transaction.rollback()
        raise RuntimeError("Problem in releasePromptReco() database transaction !")
    else:
        myThread.transaction.commit()

    return
Exemplo n.º 25
0
    def testTier1PromptReco(self):
        """
        _testT1PromptReco_

        Create a T1 Prompt Reconstruction workflow
        and verify it installs into WMBS correctly.
        """
        testArguments = getTestArguments()

        testWorkload = tier1promptrecoWorkload("TestWorkload", testArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "T0")

        testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock")
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        recoWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/Reco")
        recoWorkflow.load()
        self.assertEqual(len(recoWorkflow.outputMap.keys()), len(testArguments["WriteTiers"]) + 1,
                         "Error: Wrong number of WF outputs in the Reco WF.")

        goldenOutputMods = ["write_RECO", "write_ALCARECO", "write_AOD", "write_DQM"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = recoWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()

            if goldenOutputMod != "write_ALCARECO":
                self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod,
                                 "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = recoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        alcaSkimWorkflow = Workflow(name = "TestWorkload",
                                    task = "/TestWorkload/Reco/AlcaSkim")
        alcaSkimWorkflow.load()
        self.assertEqual(len(alcaSkimWorkflow.outputMap.keys()), len(testArguments["AlcaSkims"]) + 1,
                        "Error: Wrong number of WF outputs in the AlcaSkim WF.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)

        for goldenOutputMod in goldenOutputMods:
            mergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()
            self.assertEqual(mergedOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod,
                              "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"]
        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/Reco/RecoMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs %d." % len(mergeWorkflow.outputMap.keys()))

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name = "TestWorkload-Reco-SomeBlock")
        topLevelFileset.loadData()

        recoSubscription = Subscription(fileset = topLevelFileset, workflow = recoWorkflow)
        recoSubscription.loadData()

        self.assertEqual(recoSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(recoSubscription["split_algo"], "EventBased",
                         "Error: Wrong split algorithm. %s" % recoSubscription["split_algo"])

        alcaRecoFileset = Fileset(name = "/TestWorkload/Reco/unmerged-write_ALCARECO")
        alcaRecoFileset.loadData()

        alcaSkimSubscription = Subscription(fileset = alcaRecoFileset, workflow = alcaSkimWorkflow)
        alcaSkimSubscription.loadData()

        self.assertEqual(alcaSkimSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(alcaSkimSubscription["split_algo"], "WMBSMergeBySize",
                         "Error: Wrong split algorithm. %s" % alcaSkimSubscription["split_algo"])

        mergedRecoFileset = Fileset(name = "/TestWorkload/Reco/RecoMergewrite_RECO/merged-Merged")
        mergedRecoFileset.loadData()

        unmergedOutputs = ["write_RECO", "write_AOD", "write_DQM"]
        for unmergedOutput in unmergedOutputs:
            unmergedDataTier = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % unmergedOutput)
            unmergedDataTier.loadData()
            dataTierMergeWorkflow = Workflow(name = "TestWorkload",
                                             task = "/TestWorkload/Reco/RecoMerge%s" % unmergedOutput)
            dataTierMergeWorkflow.load()
            mergeSubscription = Subscription(fileset = unmergedDataTier, workflow = dataTierMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                             "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"])
        unmergedOutputs = []
        for alcaProd in testArguments["AlcaSkims"]:
            unmergedOutputs.append("ALCARECOStream%s" % alcaProd)
        for unmergedOutput in unmergedOutputs:
            unmergedAlcaSkim = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % unmergedOutput)
            unmergedAlcaSkim.loadData()
            alcaSkimMergeWorkflow = Workflow(name = "TestWorkload",
                                             task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" % unmergedOutput)
            alcaSkimMergeWorkflow.load()
            mergeSubscription = Subscription(fileset = unmergedAlcaSkim, workflow = alcaSkimMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                             "Error: Wrong split algorithm. %s" % mergeSubscription["split_algo"])

        goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM", "write_ALCARECO"]
        for goldenOutputMod in goldenOutputMods:
            unmergedFileset = Fileset(name = "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                       task = "/TestWorkload/Reco/RecoCleanupUnmerged%s" % goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong subscription type.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)
        for goldenOutputMod in goldenOutputMods:
            unmergedFileset = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                       task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" %goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmergedFileset, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong subscription type.")

        recoLogCollect = Fileset(name = "/TestWorkload/Reco/unmerged-logArchive")
        recoLogCollect.loadData()
        recoLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/Reco/LogCollect")
        recoLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = recoLogCollect, workflow = recoLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive")
        alcaSkimLogCollect.loadData()
        alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload",
                                                task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect")
        alcaSkimLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"]
        for goldenOutputMod in goldenOutputMods:
            recoMergeLogCollect = Fileset(name = "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" % goldenOutputMod)
            recoMergeLogCollect.loadData()
            recoMergeLogCollectWorkflow = Workflow(name = "TestWorkload",
                                       task = "/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod))
            recoMergeLogCollectWorkflow.load()
            logCollectSubscription = Subscription(fileset = recoMergeLogCollect, workflow = recoMergeLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)
        for goldenOutputMod in goldenOutputMods:
            alcaSkimLogCollect = Fileset(name = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive" % goldenOutputMod)
            alcaSkimLogCollect.loadData()
            alcaSkimLogCollectWorkflow = Workflow(name = "TestWorkload",
                                       task = "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect" % (goldenOutputMod, goldenOutputMod))
            alcaSkimLogCollectWorkflow.load()
            logCollectSubscription = Subscription(fileset = alcaSkimLogCollect, workflow = alcaSkimLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        return
Exemplo n.º 26
0
        factory = TaskChainWorkloadFactory()
        try:
            self.workload = factory("YankingTheChain", arguments)
        except Exception, ex:
            msg = "Error invoking TaskChainWorkloadFactory:\n%s" % str(ex)
            self.fail(msg)

        self.workload.setSpecUrl("somespec")
        self.workload.setOwnerDetails("*****@*****.**", "DMWM")

        testWMBSHelper = WMBSHelper(self.workload,
                                    "DigiHLT",
                                    "SomeBlock",
                                    cachepath=self.testInit.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask,
                                          testWMBSHelper.topLevelFileset)

        self._checkTask(
            self.workload.getTaskByPath("/YankingTheChain/DigiHLT"),
            arguments['Task1'])
        self._checkTask(
            self.workload.getTaskByPath(
                "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco"),
            arguments['Task2'])
        self._checkTask(
            self.workload.getTaskByPath(
                "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteALCA/ALCAReco"
            ), arguments['Task3'])
        self._checkTask(
            self.workload.getTaskByPath(
                "/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteRECO/Skims"
Exemplo n.º 27
0
    def testDataProcessing(self):
        """
        _testDataProcessing_

        Create a data processing workflow and verify it installs into WMBS
        correctly.
        """
        testWorkload = dataProcessingWorkload("TestWorkload", getTestArguments())
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")
        
        testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock")
        testWMBSHelper.createSubscription()

        procWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/DataProcessing")
        procWorkflow.load()

        self.assertEqual(len(procWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["outputRECORECO", "outputALCARECOALCARECO"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

        logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/DataProcessing/DataProcessingMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")            

        topLevelFileset = Fileset(name = "TestWorkload-DataProcessing-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow)
        procSubscription.loadData()

        self.assertEqual(procSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "LumiBased",
                         "Error: Wrong split algo.")

        unmergedReco = Fileset(name = "/TestWorkload/DataProcessing/unmerged-outputRECORECO")
        unmergedReco.loadData()
        recoMergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO")
        recoMergeWorkflow.load()
        mergeSubscription = Subscription(fileset = unmergedReco, workflow = recoMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo.")

        unmergedAlca = Fileset(name = "/TestWorkload/DataProcessing/unmerged-outputALCARECOALCARECO")
        unmergedAlca.loadData()
        alcaMergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputALCARECOALCARECO")
        alcaMergeWorkflow.load()
        mergeSubscription = Subscription(fileset = unmergedAlca, workflow = alcaMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo.")        

        for procOutput in ["outputRECORECO", "outputALCARECOALCARECO"]:
            unmerged = Fileset(name = "/TestWorkload/DataProcessing/unmerged-%s" % procOutput)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/DataProcessing/DataProcessingCleanupUnmerged%s" % procOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/unmerged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/DataProcessing/LogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "EndOfRun",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/merged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/DataProcessingoutputRECORECOMergeLogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "EndOfRun",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputALCARECOALCARECO/merged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputALCARECOALCARECO/DataProcessingoutputALCARECOALCARECOMergeLogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "EndOfRun",
                         "Error: Wrong split algo.")                

        return
Exemplo n.º 28
0
    def testDependentReDigi(self):
        """
        _testDependentReDigi_

        Verfiy that a dependent ReDigi workflow that keeps stages out
        RAW data is created and installed into WMBS correctly.
        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "redigi_t"
        configs = self.injectReDigiConfigs()
        defaultArguments["StepOneConfigCacheID"] = configs[0]
        defaultArguments["StepTwoConfigCacheID"] = configs[1]
        defaultArguments["StepThreeConfigCacheID"] = configs[2]

        testWorkload = reDigiWorkload("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DWMWM")

        testWMBSHelper = WMBSHelper(testWorkload, "StepOneProc", "SomeBlock", cachepath = self.testInit.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        topLevelFileset = Fileset(name = "TestWorkload-StepOneProc-SomeBlock")
        topLevelFileset.loadData()

        stepOneUnmergedRAWFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-RAWDEBUGoutput")
        stepOneUnmergedRAWFileset.loadData()
        stepOneMergedRAWFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/merged-Merged")
        stepOneMergedRAWFileset.loadData()
        stepOneLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-logArchive")
        stepOneLogArchiveFileset.loadData()
        stepOneMergeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/merged-logArchive")
        stepOneMergeLogArchiveFileset.loadData()

        stepTwoUnmergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-DQMoutput")
        stepTwoUnmergedDQMFileset.loadData()
        stepTwoUnmergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-RECODEBUGoutput")
        stepTwoUnmergedRECOFileset.loadData()
        stepTwoMergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput/merged-Merged")
        stepTwoMergedDQMFileset.loadData()
        stepTwoMergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/merged-Merged")
        stepTwoMergedRECOFileset.loadData()
        stepTwoLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-logArchive")
        stepTwoLogArchiveFileset.loadData()
        stepTwoMergeDQMLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput/merged-logArchive")
        stepTwoMergeDQMLogArchiveFileset.loadData()
        stepTwoMergeRECOLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/merged-logArchive")
        stepTwoMergeRECOLogArchiveFileset.loadData()

        stepThreeUnmergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/unmerged-aodOutputModule")
        stepThreeUnmergedAODFileset.loadData()
        stepThreeMergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule/merged-Merged")
        stepThreeMergedAODFileset.loadData()
        stepThreeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/unmerged-logArchive")
        stepThreeLogArchiveFileset.loadData()

        stepThreeMergeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule/merged-logArchive")
        stepThreeMergeLogArchiveFileset.loadData()

        stepOneWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                   task = "/TestWorkload/StepOneProc")
        stepOneWorkflow.load()
        self.assertEqual(stepOneWorkflow.wfType, 'redigi')
        self.assertTrue("logArchive" in stepOneWorkflow.outputMap.keys(),
                        "Error: Step one missing output module.")
        self.assertTrue("RAWDEBUGoutput" in stepOneWorkflow.outputMap.keys(),
                        "Error: Step one missing output module.")
        self.assertEqual(stepOneWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepOneLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepOneWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepOneLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepOneWorkflow.outputMap["RAWDEBUGoutput"][0]["merged_output_fileset"].id, stepOneMergedRAWFileset.id,
                         "Error: RAWDEBUG output fileset is wrong.")
        self.assertEqual(stepOneWorkflow.outputMap["RAWDEBUGoutput"][0]["output_fileset"].id, stepOneUnmergedRAWFileset.id,
                         "Error: RAWDEBUG output fileset is wrong.")

        for outputMod in stepOneWorkflow.outputMap.keys():
            self.assertTrue(len(stepOneWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepOneSub = Subscription(workflow = stepOneWorkflow, fileset = topLevelFileset)
        stepOneSub.loadData()
        self.assertEqual(stepOneSub["type"], "Processing",
                         "Error: Step one sub has wrong type.")

        stepOneCleanupWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                          task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedRAWDEBUGoutput")
        stepOneCleanupWorkflow.load()
        self.assertEqual(len(stepOneCleanupWorkflow.outputMap.keys()), 0,
                         "Error: Cleanup should have no output.")
        stepOneCleanupSub = Subscription(workflow = stepOneCleanupWorkflow, fileset = stepOneUnmergedRAWFileset)
        stepOneCleanupSub.loadData()
        self.assertEqual(stepOneCleanupSub["type"], "Cleanup",
                         "Error: Step one sub has wrong type.")

        stepOneLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                             task = "/TestWorkload/StepOneProc/LogCollect")
        stepOneLogCollectWorkflow.load()
        self.assertEqual(len(stepOneLogCollectWorkflow.outputMap.keys()), 0,
                         "Error: LogCollect should have no output.")
        stepOneLogCollectSub = Subscription(workflow = stepOneLogCollectWorkflow, fileset = stepOneLogArchiveFileset)
        stepOneLogCollectSub.loadData()
        self.assertEqual(stepOneLogCollectSub["type"], "LogCollect",
                         "Error: Step one sub has wrong type.")

        stepOneMergeWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                        task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput")
        stepOneMergeWorkflow.load()
        self.assertTrue("Merged" in stepOneMergeWorkflow.outputMap.keys(),
                        "Error: Step one merge missing output module.")
        self.assertTrue("logArchive" in stepOneMergeWorkflow.outputMap.keys(),
                        "Error: Step one merge missing output module.")
        self.assertEqual(stepOneMergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepOneMergeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepOneMergeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepOneMergeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepOneMergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepOneMergedRAWFileset.id,
                         "Error: RAWDEBUG merge output fileset is wrong.")
        self.assertEqual(stepOneMergeWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepOneMergedRAWFileset.id,
                         "Error: RAWDEBUG merge output fileset is wrong.")
        for outputMod in stepOneMergeWorkflow.outputMap.keys():
            self.assertTrue(len(stepOneMergeWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")
        stepOneMergeSub = Subscription(workflow = stepOneMergeWorkflow, fileset = stepOneUnmergedRAWFileset)
        stepOneMergeSub.loadData()
        self.assertEqual(stepOneMergeSub["type"], "Merge",
                         "Error: Step one sub has wrong type.")

        stepTwoWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                   task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc")
        stepTwoWorkflow.load()
        self.assertTrue("RECODEBUGoutput" in stepTwoWorkflow.outputMap.keys(),
                        "Error: Step two missing output module.")
        self.assertTrue("DQMoutput" in stepTwoWorkflow.outputMap.keys(),
                        "Error: Step two missing output module.")
        self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id,
                         "Error: RECODEBUG output fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["output_fileset"].id, stepTwoUnmergedRECOFileset.id,
                         "Error: RECODEBUG output fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id,
                         "Error: DQM output fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["output_fileset"].id, stepTwoUnmergedDQMFileset.id,
                         "Error: DQM output fileset is wrong.")
        stepTwoSub = Subscription(workflow = stepTwoWorkflow, fileset = stepOneMergedRAWFileset)
        stepTwoSub.loadData()
        self.assertEqual(stepTwoSub["type"], "Processing",
                         "Error: Step two sub has wrong type.")

        for outputMod in stepTwoWorkflow.outputMap.keys():
            self.assertTrue(len(stepTwoWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepTwoCleanupDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                             task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcCleanupUnmergedDQMoutput")
        stepTwoCleanupDQMWorkflow.load()
        self.assertEqual(len(stepTwoCleanupDQMWorkflow.outputMap.keys()), 0,
                         "Error: Cleanup shouldn't have any output.")
        stepTwoCleanupDQMSub = Subscription(workflow = stepTwoCleanupDQMWorkflow, fileset = stepTwoUnmergedDQMFileset)
        stepTwoCleanupDQMSub.loadData()
        self.assertEqual(stepTwoCleanupDQMSub["type"], "Cleanup",
                         "Error: Step two sub has wrong type.")

        stepTwoCleanupRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                              task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcCleanupUnmergedRECODEBUGoutput")
        stepTwoCleanupRECOWorkflow.load()
        self.assertEqual(len(stepTwoCleanupRECOWorkflow.outputMap.keys()), 0,
                         "Error: Cleanup shouldn't have any output.")
        stepTwoCleanupRECOSub = Subscription(workflow = stepTwoCleanupRECOWorkflow, fileset = stepTwoUnmergedRECOFileset)
        stepTwoCleanupRECOSub.loadData()
        self.assertEqual(stepTwoCleanupRECOSub["type"], "Cleanup",
                         "Error: Step two sub has wrong type.")

        stepTwoLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                             task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcLogCollect")
        stepTwoLogCollectWorkflow.load()
        self.assertEqual(len(stepTwoLogCollectWorkflow.outputMap.keys()), 0,
                         "Error: LogCollect shouldn't have any output.")
        stepTwoLogCollectSub = Subscription(workflow = stepTwoLogCollectWorkflow, fileset = stepTwoLogArchiveFileset)
        stepTwoLogCollectSub.loadData()
        self.assertEqual(stepTwoLogCollectSub["type"], "LogCollect",
                         "Error: Step two sub has wrong type.")

        stepTwoMergeRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                            task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput")
        stepTwoMergeRECOWorkflow.load()
        self.assertTrue("Merged" in stepTwoMergeRECOWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertTrue("logArchive" in stepTwoMergeRECOWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id,
                         "Error: RECODEBUG merge output fileset is wrong.")
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedRECOFileset.id,
                         "Error: RECODEBUG merge output fileset is wrong.")
        stepTwoMergeRECOSub = Subscription(workflow = stepTwoMergeRECOWorkflow, fileset = stepTwoUnmergedRECOFileset)
        stepTwoMergeRECOSub.loadData()
        self.assertEqual(stepTwoMergeRECOSub["type"], "Merge",
                         "Error: Step two sub has wrong type.")
        for outputMod in stepTwoMergeRECOWorkflow.outputMap.keys():
            self.assertTrue(len(stepTwoMergeRECOWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepTwoMergeDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                           task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput")
        stepTwoMergeDQMWorkflow.load()
        self.assertTrue("Merged" in stepTwoMergeDQMWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertTrue("logArchive" in stepTwoMergeDQMWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id,
                         "Error: DQM merge output fileset is wrong.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedDQMFileset.id,
                         "Error: DQM merge output fileset is wrong.")
        stepTwoMergeDQMSub = Subscription(workflow = stepTwoMergeDQMWorkflow, fileset = stepTwoUnmergedDQMFileset)
        stepTwoMergeDQMSub.loadData()
        self.assertEqual(stepTwoMergeDQMSub["type"], "Merge",
                         "Error: Step two sub has wrong type.")
        for outputMod in stepTwoMergeDQMWorkflow.outputMap.keys():
            self.assertTrue(len(stepTwoMergeDQMWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepThreeWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                     task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc")
        stepThreeWorkflow.load()
        self.assertTrue("aodOutputModule" in stepThreeWorkflow.outputMap.keys(),
                        "Error: Step three missing output module.")
        self.assertTrue("logArchive" in stepThreeWorkflow.outputMap.keys(),
                        "Error: Step three missing output module.")
        self.assertEqual(stepThreeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepThreeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepThreeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepThreeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepThreeWorkflow.outputMap["aodOutputModule"][0]["merged_output_fileset"].id, stepThreeMergedAODFileset.id,
                         "Error: RECODEBUG output fileset is wrong.")
        self.assertEqual(stepThreeWorkflow.outputMap["aodOutputModule"][0]["output_fileset"].id, stepThreeUnmergedAODFileset.id,
                         "Error: RECODEBUG output fileset is wrong.")
        stepThreeSub = Subscription(workflow = stepThreeWorkflow, fileset = stepTwoMergedRECOFileset)
        stepThreeSub.loadData()
        self.assertEqual(stepThreeSub["type"], "Processing",
                         "Error: Step three sub has wrong type.")
        for outputMod in stepThreeWorkflow.outputMap.keys():
            self.assertTrue(len(stepThreeWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepThreeCleanupWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                            task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcCleanupUnmergedaodOutputModule")
        stepThreeCleanupWorkflow.load()
        self.assertEqual(len(stepThreeCleanupWorkflow.outputMap.keys()), 0,
                         "Error: Cleanup should have no output.")
        stepThreeCleanupSub = Subscription(workflow = stepThreeCleanupWorkflow, fileset = stepThreeUnmergedAODFileset)
        stepThreeCleanupSub.loadData()
        self.assertEqual(stepThreeCleanupSub["type"], "Cleanup",
                         "Error: Step three sub has wrong type.")

        stepThreeLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                               task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcLogCollect")
        stepThreeLogCollectWorkflow.load()
        self.assertEqual(len(stepThreeLogCollectWorkflow.outputMap.keys()), 0,
                         "Error: LogCollect should have no output.")
        stepThreeLogCollectSub = Subscription(workflow = stepThreeLogCollectWorkflow, fileset = stepThreeLogArchiveFileset)
        stepThreeLogCollectSub.loadData()
        self.assertEqual(stepThreeLogCollectSub["type"], "LogCollect",
                         "Error: Step three sub has wrong type.")

        stepThreeMergeWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                          task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule")
        stepThreeMergeWorkflow.load()
        self.assertTrue("Merged" in stepThreeMergeWorkflow.outputMap.keys(),
                        "Error: Step three merge missing output module.")
        self.assertTrue("logArchive" in stepThreeMergeWorkflow.outputMap.keys(),
                        "Error: Step three merge missing output module.")
        self.assertEqual(stepThreeMergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepThreeMergeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepThreeMergeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepThreeMergeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepThreeMergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepThreeMergedAODFileset.id,
                         "Error: AOD merge output fileset is wrong.")
        self.assertEqual(stepThreeMergeWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepThreeMergedAODFileset.id,
                         "Error: AOD merge output fileset is wrong.")
        stepThreeMergeSub = Subscription(workflow = stepThreeMergeWorkflow, fileset = stepThreeUnmergedAODFileset)
        stepThreeMergeSub.loadData()
        self.assertEqual(stepThreeMergeSub["type"], "Merge",
                         "Error: Step three sub has wrong type.")
        for outputMod in stepThreeMergeWorkflow.outputMap.keys():
            self.assertTrue(len(stepThreeMergeWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        return
Exemplo n.º 29
0
        }
    
        factory = TaskChainWorkloadFactory()        
        try:
            self.workload = factory("YankingTheChain", arguments)
        except Exception, ex:
            msg = "Error invoking TaskChainWorkloadFactory:\n%s" % str(ex)
            self.fail(msg)


        self.workload.setSpecUrl("somespec")
        self.workload.setOwnerDetails("*****@*****.**", "DMWM")


        testWMBSHelper = WMBSHelper(self.workload, "SomeBlock")
        testWMBSHelper.createSubscription()


        self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT"), arguments['Task1'])
        self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT/Reco"), arguments['Task2'])
        self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT/Reco/ALCAReco"), arguments['Task3'])
        self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT/Reco/Skims"), arguments['Task4'])
 
        digi = self.workload.getTaskByPath("/YankingTheChain/DigiHLT")
        digiStep = digi.getStepHelper("cmsRun1")
        self.assertEqual(digiStep.getGlobalTag(), arguments['GlobalTag'])
 
        reco = self.workload.getTaskByPath("/YankingTheChain/DigiHLT/Reco")
        recoStep = reco.getStepHelper("cmsRun1")
        self.assertEqual(recoStep.getGlobalTag(), arguments['Task2']['GlobalTag'])
 
Exemplo n.º 30
0
    def testReRecoDroppingRECO(self):
        """
        _testReRecoDroppingRECO_

        Verify that ReReco workflows can be created and inserted into WMBS
        correctly.  The ReReco workflow is just a DataProcessing workflow with
        skims tacked on. This tests run on unmerged RECO output
        """
        skimConfig = self.injectSkimConfig()
        recoConfig = self.injectReRecoConfig()
        dataProcArguments = getTestArguments()
        dataProcArguments['ProcessingString'] = 'ProcString'
        dataProcArguments['ConfigCacheID'] = recoConfig
        dataProcArguments["SkimConfigs"] = [{
            "SkimName": "SomeSkim",
            "SkimInput": "RECOoutput",
            "SkimSplitAlgo": "FileBased",
            "SkimSplitArgs": {
                "files_per_job": 1,
                "include_parents": True
            },
            "ConfigCacheID": skimConfig,
            "Scenario": None
        }]
        dataProcArguments["CouchURL"] = os.environ["COUCHURL"]
        dataProcArguments["CouchDBName"] = "rereco_t"
        dataProcArguments["TransientOutputModules"] = ["RECOoutput"]

        testWorkload = rerecoWorkload("TestWorkload", dataProcArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")

        self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children.\
                         SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules.\
                         Merged.mergedLFNBase,
                         '/store/data/WMAgentCommissioning10/MinimumBias/USER/SkimBFilter-ProcString-v2')

        testWMBSHelper = WMBSHelper(testWorkload,
                                    "DataProcessing",
                                    "SomeBlock",
                                    cachepath=self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask,
                                          testWMBSHelper.topLevelFileset)

        skimWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/DataProcessing/SomeSkim")
        skimWorkflow.load()

        self.assertEqual(len(skimWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["SkimA", "SkimB"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0][
                "merged_output_fileset"]
            unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0][
                "output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(
                mergedOutput.name,
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged"
                % goldenOutputMod,
                "Error: Merged output fileset is wrong: %s" %
                mergedOutput.name)
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/DataProcessing/SomeSkim/unmerged-%s" %
                goldenOutputMod,
                "Error: Unmerged output fileset is wrong: %s" %
                unmergedOutput.name)

        logArchOutput = skimWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(
            logArchOutput.name,
            "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.")
        self.assertEqual(
            unmergedLogArchOutput.name,
            "/TestWorkload/DataProcessing/SomeSkim/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s" %
                goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(
                mergedMergeOutput.name,
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged"
                % goldenOutputMod, "Error: Merged output fileset is wrong.")
            self.assertEqual(
                unmergedMergeOutput.name,
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-Merged"
                % goldenOutputMod, "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(
                logArchOutput.name,
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong: %s" %
                logArchOutput.name)
            self.assertEqual(
                unmergedLogArchOutput.name,
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMerge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(
            name="/TestWorkload/DataProcessing/unmerged-RECOoutput")
        topLevelFileset.loadData()

        skimSubscription = Subscription(fileset=topLevelFileset,
                                        workflow=skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            unmerged = Fileset(
                name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" %
                skimOutput)
            unmerged.loadData()
            mergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s"
                % skimOutput)
            mergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmerged,
                                             workflow=mergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"],
                             "ParentlessMergeBySize",
                             "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            unmerged = Fileset(
                name="/TestWorkload/DataProcessing/SomeSkim/unmerged-Skim%s" %
                skimOutput)
            unmerged.loadData()
            cleanupWorkflow = Workflow(
                name="TestWorkload",
                task=
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimCleanupUnmergedSkim%s"
                % skimOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmerged,
                                               workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"],
                             "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            skimMergeLogCollect = Fileset(
                name=
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive"
                % skimOutput)
            skimMergeLogCollect.loadData()
            skimMergeLogCollectWorkflow = Workflow(
                name="TestWorkload",
                task=
                "/TestWorkload/DataProcessing/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect"
                % (skimOutput, skimOutput))
            skimMergeLogCollectWorkflow.load()
            logCollectSub = Subscription(fileset=skimMergeLogCollect,
                                         workflow=skimMergeLogCollectWorkflow)
            logCollectSub.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algo.")

        return
Exemplo n.º 31
0
    def testDataProcessing(self):
        """
        _testDataProcessing_

        Create a data processing workflow and verify it installs into WMBS
        correctly. Check that we can drop an output module.
        """
        testArgs = getTestArguments()
        testArgs['TransientOutputModules'] = ['RECOoutput']
        testWorkload = dataProcessingWorkload("TestWorkload", testArgs)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")

        testWMBSHelper = WMBSHelper(testWorkload,
                                    "DataProcessing",
                                    "SomeBlock",
                                    cachepath=self.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask,
                                          testWMBSHelper.topLevelFileset)

        procWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/DataProcessing")
        procWorkflow.load()

        self.assertEqual(len(procWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["RECOoutput", "ALCARECOoutput"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = procWorkflow.outputMap[goldenOutputMod][0][
                "merged_output_fileset"]
            unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0][
                "output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            if goldenOutputMod in testArgs["TransientOutputModules"]:
                self.assertEqual(
                    mergedOutput.name,
                    "/TestWorkload/DataProcessing/unmerged-%s" %
                    goldenOutputMod,
                    "Error: Merged output fileset is wrong: %s" %
                    mergedOutput.name)
            else:
                self.assertEqual(
                    mergedOutput.name,
                    "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged"
                    % goldenOutputMod,
                    "Error: Merged output fileset is wrong: %s" %
                    mergedOutput.name)
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/DataProcessing/unmerged-%s" % goldenOutputMod,
                "Error: Unmerged output fileset is wrong.")

        logArchOutput = procWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name,
                         "/TestWorkload/DataProcessing/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name,
                         "/TestWorkload/DataProcessing/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            if goldenOutputMod in testArgs["TransientOutputModules"]:
                # No merge for this output module
                continue
            mergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/DataProcessing/DataProcessingMerge%s" %
                goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(
                mergedMergeOutput.name,
                "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged"
                % goldenOutputMod, "Error: Merged output fileset is wrong.")
            self.assertEqual(
                unmergedMergeOutput.name,
                "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-Merged"
                % goldenOutputMod, "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(
                logArchOutput.name,
                "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong: %s" %
                logArchOutput.name)
            self.assertEqual(
                unmergedLogArchOutput.name,
                "/TestWorkload/DataProcessing/DataProcessingMerge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-DataProcessing-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset=topLevelFileset,
                                        workflow=procWorkflow)
        procSubscription.loadData()

        self.assertEqual(procSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "LumiBased",
                         "Error: Wrong split algo.")

        unmergedReco = Fileset(
            name="/TestWorkload/DataProcessing/unmerged-RECOoutput")
        unmergedReco.loadData()
        recoMergeWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput")
        # No merge workflow should exist in WMBS
        self.assertRaises(IndexError, recoMergeWorkflow.load)

        unmergedAlca = Fileset(
            name="/TestWorkload/DataProcessing/unmerged-ALCARECOoutput")
        unmergedAlca.loadData()
        alcaMergeWorkflow = Workflow(
            name="TestWorkload",
            task=
            "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput")
        alcaMergeWorkflow.load()
        mergeSubscription = Subscription(fileset=unmergedAlca,
                                         workflow=alcaMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"],
                         "ParentlessMergeBySize", "Error: Wrong split algo.")

        for procOutput in ["RECOoutput", "ALCARECOoutput"]:
            unmerged = Fileset(
                name="/TestWorkload/DataProcessing/unmerged-%s" % procOutput)
            unmerged.loadData()
            cleanupWorkflow = Workflow(
                name="TestWorkload",
                task=
                "/TestWorkload/DataProcessing/DataProcessingCleanupUnmerged%s"
                % procOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmerged,
                                               workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"],
                             "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        procLogCollect = Fileset(
            name="/TestWorkload/DataProcessing/unmerged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/DataProcessing/LogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=procLogCollect,
                                     workflow=procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(
            name=
            "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput/merged-logArchive"
        )
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(
            name="TestWorkload",
            task=
            "/TestWorkload/DataProcessing/DataProcessingMergeALCARECOoutput/DataProcessingALCARECOoutputMergeLogCollect"
        )
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=procLogCollect,
                                     workflow=procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        return
Exemplo n.º 32
0
def configureRunStream(tier0Config, run, stream, specDirectory,
                       dqmUploadProxy):
    """
    _configureRunStream_

    Called by Tier0Feeder for new run/streams.

    Retrieve global run settings and build the part
    of the configuration relevant to run/stream
    and write it to the database.

    Create workflows, filesets and subscriptions for
    the processing of runs/streams.

    """
    logging.debug("configureRunStream() : %d , %s" % (run, stream))
    myThread = threading.currentThread()

    daoFactory = DAOFactory(package="T0.WMBS",
                            logger=logging,
                            dbinterface=myThread.dbi)

    # retrieve some basic run information
    getRunInfoDAO = daoFactory(classname="RunConfig.GetRunInfo")
    runInfo = getRunInfoDAO.execute(run, transaction=False)[0]

    # treat centralDAQ or miniDAQ runs (have an HLT key) different from local runs
    if runInfo['hltkey'] != None:

        # streams not explicitely configured are repacked
        if stream not in tier0Config.Streams.dictionary_().keys():
            addRepackConfig(tier0Config, stream)

        streamConfig = tier0Config.Streams.dictionary_()[stream]

        # consistency check to make sure stream exists and has datasets defined
        # only run if we don't ignore the stream
        if streamConfig.ProcessingStyle != "Ignore":
            getStreamDatasetsDAO = daoFactory(
                classname="RunConfig.GetStreamDatasets")
            datasets = getStreamDatasetsDAO.execute(run,
                                                    stream,
                                                    transaction=False)
            if len(datasets) == 0:
                raise RuntimeError(
                    "Stream is not defined in HLT menu or has no datasets !")

        # write stream/dataset mapping (for special express and error datasets)
        insertDatasetDAO = daoFactory(
            classname="RunConfig.InsertPrimaryDataset")
        insertStreamDatasetDAO = daoFactory(
            classname="RunConfig.InsertStreamDataset")

        # write stream configuration
        insertCMSSWVersionDAO = daoFactory(
            classname="RunConfig.InsertCMSSWVersion")
        insertStreamStyleDAO = daoFactory(
            classname="RunConfig.InsertStreamStyle")
        insertRepackConfigDAO = daoFactory(
            classname="RunConfig.InsertRepackConfig")
        insertPromptCalibrationDAO = daoFactory(
            classname="RunConfig.InsertPromptCalibration")
        insertExpressConfigDAO = daoFactory(
            classname="RunConfig.InsertExpressConfig")
        insertSpecialDatasetDAO = daoFactory(
            classname="RunConfig.InsertSpecialDataset")
        insertDatasetScenarioDAO = daoFactory(
            classname="RunConfig.InsertDatasetScenario")
        insertStreamFilesetDAO = daoFactory(
            classname="RunConfig.InsertStreamFileset")
        insertRecoReleaseConfigDAO = daoFactory(
            classname="RunConfig.InsertRecoReleaseConfig")
        insertWorkflowMonitoringDAO = daoFactory(
            classname="RunConfig.InsertWorkflowMonitoring")
        insertStorageNodeDAO = daoFactory(
            classname="RunConfig.InsertStorageNode")
        insertPhEDExConfigDAO = daoFactory(
            classname="RunConfig.InsertPhEDExConfig")

        bindsCMSSWVersion = []
        bindsDataset = []
        bindsStreamDataset = []
        bindsStreamStyle = {
            'RUN': run,
            'STREAM': stream,
            'STYLE': streamConfig.ProcessingStyle
        }
        bindsRepackConfig = {}
        bindsPromptCalibration = {}
        bindsExpressConfig = {}
        bindsSpecialDataset = {}
        bindsDatasetScenario = []
        bindsStorageNode = []
        bindsPhEDExConfig = []

        # mark workflows as injected
        wmbsDaoFactory = DAOFactory(package="WMCore.WMBS",
                                    logger=logging,
                                    dbinterface=myThread.dbi)
        markWorkflowsInjectedDAO = wmbsDaoFactory(
            classname="Workflow.MarkInjectedWorkflows")

        #
        # for spec creation, details for all outputs
        #
        outputModuleDetails = []

        #
        # special dataset for some express output
        #
        specialDataset = None

        #
        # for PromptReco delay settings
        #
        promptRecoDelay = {}
        promptRecoDelayOffset = {}

        #
        # for PhEDEx subscription settings
        #
        subscriptions = []

        # some hardcoded PhEDEx defaults
        expressPhEDExInjectNode = "T2_CH_CERN"
        expressPhEDExSubscribeNode = "T2_CH_CERN"

        #
        # first take care of all stream settings
        #
        getStreamOnlineVersionDAO = daoFactory(
            classname="RunConfig.GetStreamOnlineVersion")
        onlineVersion = getStreamOnlineVersionDAO.execute(run,
                                                          stream,
                                                          transaction=False)

        if streamConfig.ProcessingStyle == "Bulk":

            streamConfig.Repack.CMSSWVersion = streamConfig.VersionOverride.get(
                onlineVersion, onlineVersion)

            bindsCMSSWVersion.append(
                {'VERSION': streamConfig.Repack.CMSSWVersion})

            streamConfig.Repack.ScramArch = tier0Config.Global.ScramArches.get(
                streamConfig.Repack.CMSSWVersion,
                tier0Config.Global.DefaultScramArch)

            bindsRepackConfig = {
                'RUN': run,
                'STREAM': stream,
                'PROC_VER': streamConfig.Repack.ProcessingVersion,
                'MAX_SIZE_SINGLE_LUMI': streamConfig.Repack.MaxSizeSingleLumi,
                'MAX_SIZE_MULTI_LUMI': streamConfig.Repack.MaxSizeMultiLumi,
                'MIN_SIZE': streamConfig.Repack.MinInputSize,
                'MAX_SIZE': streamConfig.Repack.MaxInputSize,
                'MAX_EDM_SIZE': streamConfig.Repack.MaxEdmSize,
                'MAX_OVER_SIZE': streamConfig.Repack.MaxOverSize,
                'MAX_EVENTS': streamConfig.Repack.MaxInputEvents,
                'MAX_FILES': streamConfig.Repack.MaxInputFiles,
                'BLOCK_DELAY': streamConfig.Repack.BlockCloseDelay,
                'CMSSW': streamConfig.Repack.CMSSWVersion,
                'SCRAM_ARCH': streamConfig.Repack.ScramArch
            }

        elif streamConfig.ProcessingStyle == "Express":

            specialDataset = "Stream%s" % stream
            bindsDataset.append({'PRIMDS': specialDataset})
            bindsStreamDataset.append({
                'RUN': run,
                'PRIMDS': specialDataset,
                'STREAM': stream
            })
            bindsSpecialDataset = {'STREAM': stream, 'PRIMDS': specialDataset}
            bindsDatasetScenario.append({
                'RUN':
                run,
                'PRIMDS':
                specialDataset,
                'SCENARIO':
                streamConfig.Express.Scenario
            })

            if streamConfig.Express.WriteDQM:
                outputModuleDetails.append({
                    'dataTier': tier0Config.Global.DQMDataTier,
                    'eventContent': tier0Config.Global.DQMDataTier,
                    'primaryDataset': specialDataset
                })

            bindsStorageNode.append({'NODE': expressPhEDExSubscribeNode})

            bindsPhEDExConfig.append({
                'RUN': run,
                'PRIMDS': specialDataset,
                'ARCHIVAL_NODE': None,
                'TAPE_NODE': None,
                'DISK_NODE': expressPhEDExSubscribeNode
            })

            subscriptions.append({
                'custodialSites': [],
                'nonCustodialSites': [expressPhEDExSubscribeNode],
                'autoApproveSites': [expressPhEDExSubscribeNode],
                'priority':
                "high",
                'primaryDataset':
                specialDataset
            })

            alcaSkim = None
            if len(streamConfig.Express.AlcaSkims) > 0:
                outputModuleDetails.append({
                    'dataTier': "ALCARECO",
                    'eventContent': "ALCARECO",
                    'primaryDataset': specialDataset
                })
                alcaSkim = ",".join(streamConfig.Express.AlcaSkims)

                numPromptCalibProd = 0
                for producer in streamConfig.Express.AlcaSkims:
                    if producer.startswith("PromptCalibProd"):
                        numPromptCalibProd += 1

                if numPromptCalibProd > 0:
                    bindsPromptCalibration = {
                        'RUN': run,
                        'STREAM': stream,
                        'NUM_PRODUCER': numPromptCalibProd
                    }

            dqmSeq = None
            if len(streamConfig.Express.DqmSequences) > 0:
                dqmSeq = ",".join(streamConfig.Express.DqmSequences)

            streamConfig.Express.CMSSWVersion = streamConfig.VersionOverride.get(
                onlineVersion, onlineVersion)

            bindsCMSSWVersion.append(
                {'VERSION': streamConfig.Express.CMSSWVersion})

            streamConfig.Express.ScramArch = tier0Config.Global.ScramArches.get(
                streamConfig.Express.CMSSWVersion,
                tier0Config.Global.DefaultScramArch)

            streamConfig.Express.RecoScramArch = None
            if streamConfig.Express.RecoCMSSWVersion != None:

                bindsCMSSWVersion.append(
                    {'VERSION': streamConfig.Express.RecoCMSSWVersion})

                streamConfig.Express.RecoScramArch = tier0Config.Global.ScramArches.get(
                    streamConfig.Express.RecoCMSSWVersion,
                    tier0Config.Global.DefaultScramArch)

            bindsExpressConfig = {
                'RUN': run,
                'STREAM': stream,
                'PROC_VER': streamConfig.Express.ProcessingVersion,
                'WRITE_TIERS': ",".join(streamConfig.Express.DataTiers),
                'WRITE_DQM': streamConfig.Express.WriteDQM,
                'GLOBAL_TAG': streamConfig.Express.GlobalTag,
                'MAX_RATE': streamConfig.Express.MaxInputRate,
                'MAX_EVENTS': streamConfig.Express.MaxInputEvents,
                'MAX_SIZE': streamConfig.Express.MaxInputSize,
                'MAX_FILES': streamConfig.Express.MaxInputFiles,
                'MAX_LATENCY': streamConfig.Express.MaxLatency,
                'DQM_INTERVAL': streamConfig.Express.PeriodicHarvestInterval,
                'BLOCK_DELAY': streamConfig.Express.BlockCloseDelay,
                'CMSSW': streamConfig.Express.CMSSWVersion,
                'SCRAM_ARCH': streamConfig.Express.ScramArch,
                'RECO_CMSSW': streamConfig.Express.RecoCMSSWVersion,
                'RECO_SCRAM_ARCH': streamConfig.Express.RecoScramArch,
                'MULTICORE': streamConfig.Express.Multicore,
                'ALCA_SKIM': alcaSkim,
                'DQM_SEQ': dqmSeq
            }

        #
        # then configure datasets
        #
        getStreamDatasetTriggersDAO = daoFactory(
            classname="RunConfig.GetStreamDatasetTriggers")
        datasetTriggers = getStreamDatasetTriggersDAO.execute(
            run, stream, transaction=False)

        for dataset, paths in datasetTriggers.items():

            if dataset == "Unassigned path":
                if stream == "Express" and run in [
                        210114, 210116, 210120, 210121, 210178
                ]:
                    continue
                if stream == "A" and run in [216120, 216125, 216130]:
                    continue

            datasetConfig = retrieveDatasetConfig(tier0Config, dataset)

            selectEvents = []
            for path in sorted(paths):
                selectEvents.append("%s:%s" % (path, runInfo['process']))

            if streamConfig.ProcessingStyle == "Bulk":

                promptRecoDelay[datasetConfig.Name] = datasetConfig.RecoDelay
                promptRecoDelayOffset[
                    datasetConfig.Name] = datasetConfig.RecoDelayOffset

                outputModuleDetails.append({
                    'dataTier': "RAW",
                    'eventContent': "ALL",
                    'selectEvents': selectEvents,
                    'primaryDataset': dataset
                })

                bindsPhEDExConfig.append({
                    'RUN': run,
                    'PRIMDS': dataset,
                    'ARCHIVAL_NODE': datasetConfig.ArchivalNode,
                    'TAPE_NODE': datasetConfig.TapeNode,
                    'DISK_NODE': datasetConfig.DiskNode
                })

                custodialSites = []
                nonCustodialSites = []
                autoApproveSites = []
                if datasetConfig.ArchivalNode != None:
                    bindsStorageNode.append(
                        {'NODE': datasetConfig.ArchivalNode})
                    custodialSites.append(datasetConfig.ArchivalNode)
                    autoApproveSites.append(datasetConfig.ArchivalNode)
                if datasetConfig.TapeNode != None:
                    bindsStorageNode.append({'NODE': datasetConfig.TapeNode})
                    custodialSites.append(datasetConfig.TapeNode)
                if datasetConfig.DiskNode != None:
                    bindsStorageNode.append({'NODE': datasetConfig.DiskNode})
                    nonCustodialSites.append(datasetConfig.DiskNode)
                    autoApproveSites.append(datasetConfig.DiskNode)

                if len(custodialSites) > 0 or len(nonCustodialSites) > 0:
                    subscriptions.append({
                        'custodialSites': custodialSites,
                        'custodialSubType': "Replica",
                        'nonCustodialSites': nonCustodialSites,
                        'autoApproveSites': autoApproveSites,
                        'priority': "high",
                        'primaryDataset': dataset,
                        'dataTier': "RAW"
                    })

                #
                # set subscriptions for error dataset
                #
                custodialSites = []
                nonCustodialSites = []
                autoApproveSites = []
                if datasetConfig.ArchivalNode != None:
                    custodialSites.append(datasetConfig.ArchivalNode)
                    autoApproveSites.append(datasetConfig.ArchivalNode)
                if datasetConfig.ArchivalNode != expressPhEDExInjectNode:
                    nonCustodialSites.append(expressPhEDExInjectNode)
                    autoApproveSites.append(expressPhEDExInjectNode)

                if len(custodialSites) > 0 or len(nonCustodialSites) > 0:
                    subscriptions.append({
                        'custodialSites': custodialSites,
                        'custodialSubType': "Replica",
                        'nonCustodialSites': nonCustodialSites,
                        'autoApproveSites': autoApproveSites,
                        'priority': "high",
                        'primaryDataset': "%s-Error" % dataset,
                        'dataTier': "RAW"
                    })

            elif streamConfig.ProcessingStyle == "Express":

                for dataTier in streamConfig.Express.DataTiers:
                    if dataTier not in ["ALCARECO", "DQM", "DQMIO"]:

                        outputModuleDetails.append({
                            'dataTier': dataTier,
                            'eventContent': dataTier,
                            'selectEvents': selectEvents,
                            'primaryDataset': dataset
                        })

                bindsPhEDExConfig.append({
                    'RUN':
                    run,
                    'PRIMDS':
                    dataset,
                    'ARCHIVAL_NODE':
                    None,
                    'TAPE_NODE':
                    None,
                    'DISK_NODE':
                    expressPhEDExSubscribeNode
                })

                subscriptions.append({
                    'custodialSites': [],
                    'nonCustodialSites': [expressPhEDExSubscribeNode],
                    'autoApproveSites': [expressPhEDExSubscribeNode],
                    'priority':
                    "high",
                    'primaryDataset':
                    dataset
                })

        #
        # finally create WMSpec
        #
        outputs = {}
        if streamConfig.ProcessingStyle == "Bulk":

            taskName = "Repack"
            workflowName = "Repack_Run%d_Stream%s" % (run, stream)

            specArguments = {}

            specArguments['TimePerEvent'] = 1
            specArguments['SizePerEvent'] = 200
            specArguments['Memory'] = 1800

            specArguments['RequestPriority'] = 0

            specArguments['CMSSWVersion'] = streamConfig.Repack.CMSSWVersion
            specArguments['ScramArch'] = streamConfig.Repack.ScramArch

            specArguments[
                'ProcessingVersion'] = streamConfig.Repack.ProcessingVersion
            specArguments[
                'MaxSizeSingleLumi'] = streamConfig.Repack.MaxSizeSingleLumi
            specArguments[
                'MaxSizeMultiLumi'] = streamConfig.Repack.MaxSizeMultiLumi
            specArguments['MinInputSize'] = streamConfig.Repack.MinInputSize
            specArguments['MaxInputSize'] = streamConfig.Repack.MaxInputSize
            specArguments['MaxEdmSize'] = streamConfig.Repack.MaxEdmSize
            specArguments['MaxOverSize'] = streamConfig.Repack.MaxOverSize
            specArguments[
                'MaxInputEvents'] = streamConfig.Repack.MaxInputEvents
            specArguments['MaxInputFiles'] = streamConfig.Repack.MaxInputFiles

            specArguments['UnmergedLFNBase'] = "/store/unmerged/%s" % runInfo[
                'bulk_data_type']
            if runInfo['backfill']:
                specArguments['MergedLFNBase'] = "/store/backfill/%s/%s" % (
                    runInfo['backfill'], runInfo['bulk_data_type'])
            else:
                specArguments[
                    'MergedLFNBase'] = "/store/%s" % runInfo['bulk_data_type']

            specArguments[
                'BlockCloseDelay'] = streamConfig.Repack.BlockCloseDelay

        elif streamConfig.ProcessingStyle == "Express":

            taskName = "Express"
            workflowName = "Express_Run%d_Stream%s" % (run, stream)

            specArguments = {}

            specArguments['TimePerEvent'] = 12
            specArguments['SizePerEvent'] = 512
            specArguments['Memory'] = 1800

            if streamConfig.Express.Multicore:
                specArguments['Multicore'] = streamConfig.Express.Multicore
                specArguments['Memory'] = 1800 * streamConfig.Express.Multicore

            specArguments['RequestPriority'] = 0

            specArguments['ProcessingString'] = "Express"
            specArguments[
                'ProcessingVersion'] = streamConfig.Express.ProcessingVersion
            specArguments['Scenario'] = streamConfig.Express.Scenario

            specArguments['CMSSWVersion'] = streamConfig.Express.CMSSWVersion
            specArguments['ScramArch'] = streamConfig.Express.ScramArch
            specArguments[
                'RecoCMSSWVersion'] = streamConfig.Express.RecoCMSSWVersion
            specArguments['RecoScramArch'] = streamConfig.Express.RecoScramArch

            specArguments['GlobalTag'] = streamConfig.Express.GlobalTag
            specArguments['GlobalTagTransaction'] = "Express_%d" % run
            specArguments[
                'GlobalTagConnect'] = streamConfig.Express.GlobalTagConnect

            specArguments['MaxInputRate'] = streamConfig.Express.MaxInputRate
            specArguments[
                'MaxInputEvents'] = streamConfig.Express.MaxInputEvents
            specArguments['MaxInputSize'] = streamConfig.Express.MaxInputSize
            specArguments['MaxInputFiles'] = streamConfig.Express.MaxInputFiles
            specArguments['MaxLatency'] = streamConfig.Express.MaxLatency
            specArguments['AlcaSkims'] = streamConfig.Express.AlcaSkims
            specArguments['DqmSequences'] = streamConfig.Express.DqmSequences
            specArguments['AlcaHarvestTimeout'] = runInfo['ah_timeout']
            specArguments['AlcaHarvestDir'] = runInfo['ah_dir']
            specArguments['DQMUploadProxy'] = dqmUploadProxy
            specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl']
            specArguments['StreamName'] = stream
            specArguments['SpecialDataset'] = specialDataset

            specArguments['UnmergedLFNBase'] = "/store/unmerged/express"
            specArguments['MergedLFNBase'] = "/store/express"
            if runInfo['backfill']:
                specArguments[
                    'MergedLFNBase'] = "/store/backfill/%s/express" % runInfo[
                        'backfill']
            else:
                specArguments['MergedLFNBase'] = "/store/express"

            specArguments[
                'PeriodicHarvestInterval'] = streamConfig.Express.PeriodicHarvestInterval

            specArguments[
                'BlockCloseDelay'] = streamConfig.Express.BlockCloseDelay

        if streamConfig.ProcessingStyle in ['Bulk', 'Express']:

            specArguments['RunNumber'] = run
            specArguments['AcquisitionEra'] = tier0Config.Global.AcquisitionEra
            specArguments['Outputs'] = outputModuleDetails
            specArguments[
                'OverrideCatalog'] = "trivialcatalog_file:/cvmfs/cms.cern.ch/SITECONF/T2_CH_CERN/Tier0/override_catalog.xml?protocol=override"
            specArguments['ValidStatus'] = "VALID"

            specArguments['SiteWhitelist'] = ["T2_CH_CERN_T0"]
            specArguments['SiteBlacklist'] = []

        if streamConfig.ProcessingStyle == "Bulk":
            factory = RepackWorkloadFactory()
            wmSpec = factory.factoryWorkloadConstruction(
                workflowName, specArguments)
            wmSpec.setPhEDExInjectionOverride(runInfo['bulk_data_loc'])
            for subscription in subscriptions:
                wmSpec.setSubscriptionInformation(**subscription)
        elif streamConfig.ProcessingStyle == "Express":
            factory = ExpressWorkloadFactory()
            wmSpec = factory.factoryWorkloadConstruction(
                workflowName, specArguments)
            wmSpec.setPhEDExInjectionOverride(expressPhEDExInjectNode)
            for subscription in subscriptions:
                wmSpec.setSubscriptionInformation(**subscription)

        if streamConfig.ProcessingStyle in ['Bulk', 'Express']:
            wmSpec.setOwnerDetails(
                "*****@*****.**", "T0", {
                    'vogroup': 'DEFAULT',
                    'vorole': 'DEFAULT',
                    'dn': "*****@*****.**"
                })

            wmSpec.setupPerformanceMonitoring(maxRSS=10485760,
                                              maxVSize=10485760,
                                              softTimeout=604800,
                                              gracePeriod=3600)

            wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath=specDirectory)

        filesetName = "Run%d_Stream%s" % (run, stream)
        fileset = Fileset(filesetName)

        #
        # create workflow (currently either repack or express)
        #
        try:
            myThread.transaction.begin()
            if len(bindsCMSSWVersion) > 0:
                insertCMSSWVersionDAO.execute(bindsCMSSWVersion,
                                              conn=myThread.transaction.conn,
                                              transaction=True)
            if len(bindsDataset) > 0:
                insertDatasetDAO.execute(bindsDataset,
                                         conn=myThread.transaction.conn,
                                         transaction=True)
            if len(bindsStreamDataset) > 0:
                insertStreamDatasetDAO.execute(bindsStreamDataset,
                                               conn=myThread.transaction.conn,
                                               transaction=True)
            if len(bindsRepackConfig) > 0:
                insertRepackConfigDAO.execute(bindsRepackConfig,
                                              conn=myThread.transaction.conn,
                                              transaction=True)
            if len(bindsPromptCalibration) > 0:
                insertPromptCalibrationDAO.execute(
                    bindsPromptCalibration,
                    conn=myThread.transaction.conn,
                    transaction=True)
            if len(bindsExpressConfig) > 0:
                insertExpressConfigDAO.execute(bindsExpressConfig,
                                               conn=myThread.transaction.conn,
                                               transaction=True)
            if len(bindsSpecialDataset) > 0:
                insertSpecialDatasetDAO.execute(bindsSpecialDataset,
                                                conn=myThread.transaction.conn,
                                                transaction=True)
            if len(bindsDatasetScenario) > 0:
                insertDatasetScenarioDAO.execute(
                    bindsDatasetScenario,
                    conn=myThread.transaction.conn,
                    transaction=True)
            if len(bindsStorageNode) > 0:
                insertStorageNodeDAO.execute(bindsStorageNode,
                                             conn=myThread.transaction.conn,
                                             transaction=True)
            if len(bindsPhEDExConfig) > 0:
                insertPhEDExConfigDAO.execute(bindsPhEDExConfig,
                                              conn=myThread.transaction.conn,
                                              transaction=True)
            insertStreamStyleDAO.execute(bindsStreamStyle,
                                         conn=myThread.transaction.conn,
                                         transaction=True)
            if streamConfig.ProcessingStyle in ['Bulk', 'Express']:
                insertStreamFilesetDAO.execute(run,
                                               stream,
                                               filesetName,
                                               conn=myThread.transaction.conn,
                                               transaction=True)
                fileset.load()
                wmbsHelper.createSubscription(wmSpec.getTask(taskName),
                                              fileset,
                                              alternativeFilesetClose=True)
                insertWorkflowMonitoringDAO.execute(
                    [fileset.id],
                    conn=myThread.transaction.conn,
                    transaction=True)
            if streamConfig.ProcessingStyle == "Bulk":
                bindsRecoReleaseConfig = []
                for fileset, primds in wmbsHelper.getMergeOutputMapping(
                ).items():
                    bindsRecoReleaseConfig.append({
                        'RUN':
                        run,
                        'PRIMDS':
                        primds,
                        'FILESET':
                        fileset,
                        'RECODELAY':
                        promptRecoDelay[primds],
                        'RECODELAYOFFSET':
                        promptRecoDelayOffset[primds]
                    })
                insertRecoReleaseConfigDAO.execute(
                    bindsRecoReleaseConfig,
                    conn=myThread.transaction.conn,
                    transaction=True)
            elif streamConfig.ProcessingStyle == "Express":
                markWorkflowsInjectedDAO.execute(
                    [workflowName],
                    injected=True,
                    conn=myThread.transaction.conn,
                    transaction=True)
        except Exception as ex:
            logging.exception(ex)
            myThread.transaction.rollback()
            raise RuntimeError(
                "Problem in configureRunStream() database transaction !")
        else:
            myThread.transaction.commit()

    else:

        # should we do anything for local runs ?
        pass
    return
Exemplo n.º 33
0
def configureRunStream(tier0Config, run, stream, specDirectory, dqmUploadProxy):
    """
    _configureRunStream_

    Called by Tier0Feeder for new run/streams.

    Retrieve global run settings and build the part
    of the configuration relevant to run/stream
    and write it to the database.

    Create workflows, filesets and subscriptions for
    the processing of runs/streams.

    """
    logging.debug("configureRunStream() : %d , %s" % (run, stream))
    myThread = threading.currentThread()

    daoFactory = DAOFactory(package = "T0.WMBS",
                            logger = logging,
                            dbinterface = myThread.dbi)

    # retrieve some basic run information
    getRunInfoDAO = daoFactory(classname = "RunConfig.GetRunInfo")
    runInfo = getRunInfoDAO.execute(run, transaction = False)[0]

    #
    # treat centralDAQ or miniDAQ runs (have an HLT key) different from local runs
    #
    if runInfo['hltkey'] != None:

        # streams not explicitely configured are repacked
        if stream not in tier0Config.Streams.dictionary_().keys():
            addRepackConfig(tier0Config, stream)

        streamConfig = tier0Config.Streams.dictionary_()[stream]

        # consistency check to make sure stream exists and has datasets defined
        # only run if we don't ignore the stream
        if streamConfig.ProcessingStyle != "Ignore":
            getStreamDatasetsDAO = daoFactory(classname = "RunConfig.GetStreamDatasets")
            datasets = getStreamDatasetsDAO.execute(run, stream, transaction = False)
            if len(datasets) == 0:
                raise RuntimeError("Stream is not defined in HLT menu or has no datasets !")


        # write stream/dataset mapping (for special express and error datasets)
        insertDatasetDAO = daoFactory(classname = "RunConfig.InsertPrimaryDataset")
        insertStreamDatasetDAO = daoFactory(classname = "RunConfig.InsertStreamDataset")

        # write stream configuration
        insertCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion")
        insertStreamStyleDAO = daoFactory(classname = "RunConfig.InsertStreamStyle")
        insertRepackConfigDAO = daoFactory(classname = "RunConfig.InsertRepackConfig")
        insertPromptCalibrationDAO = daoFactory(classname = "RunConfig.InsertPromptCalibration")
        insertExpressConfigDAO = daoFactory(classname = "RunConfig.InsertExpressConfig")
        insertSpecialDatasetDAO = daoFactory(classname = "RunConfig.InsertSpecialDataset")
        insertDatasetScenarioDAO = daoFactory(classname = "RunConfig.InsertDatasetScenario")
        insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset")
        insertRecoReleaseConfigDAO = daoFactory(classname = "RunConfig.InsertRecoReleaseConfig")
        insertWorkflowMonitoringDAO = daoFactory(classname = "RunConfig.InsertWorkflowMonitoring")
        insertStorageNodeDAO = daoFactory(classname = "RunConfig.InsertStorageNode")
        insertPhEDExConfigDAO = daoFactory(classname = "RunConfig.InsertPhEDExConfig")

        bindsCMSSWVersion = []
        bindsDataset = []
        bindsStreamDataset = []
        bindsStreamStyle = {'RUN' : run,
                            'STREAM' : stream,
                            'STYLE': streamConfig.ProcessingStyle }
        bindsRepackConfig = {}
        bindsPromptCalibration = {}
        bindsExpressConfig = {}
        bindsSpecialDataset = {}
        bindsDatasetScenario = []
        bindsStorageNode = []
        bindsPhEDExConfig = []

        # mark workflows as injected
        wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS",
                                    logger = logging,
                                    dbinterface = myThread.dbi)
        markWorkflowsInjectedDAO = wmbsDaoFactory(classname = "Workflow.MarkInjectedWorkflows")

        #
        # for spec creation, details for all outputs
        #
        outputModuleDetails = []

        #
        # special dataset for some express output
        #
        specialDataset = None

        #
        # for PromptReco delay settings
        #
        promptRecoDelay = {}
        promptRecoDelayOffset = {}

        #
        # for PhEDEx subscription settings
        #
        subscriptions = []

        # some hardcoded PhEDEx defaults
        expressPhEDExInjectNode = "T2_CH_CERN"
        expressPhEDExSubscribeNode = "T2_CH_CERN"

        #
        # first take care of all stream settings
        #
        getStreamOnlineVersionDAO = daoFactory(classname = "RunConfig.GetStreamOnlineVersion")
        onlineVersion = getStreamOnlineVersionDAO.execute(run, stream, transaction = False)

        if streamConfig.ProcessingStyle == "Bulk":

            streamConfig.Repack.CMSSWVersion = streamConfig.VersionOverride.get(onlineVersion, onlineVersion)

            bindsCMSSWVersion.append( { 'VERSION' : streamConfig.Repack.CMSSWVersion } )

            streamConfig.Repack.ScramArch = tier0Config.Global.ScramArches.get(streamConfig.Repack.CMSSWVersion,
                                                                               tier0Config.Global.DefaultScramArch)

            bindsRepackConfig = { 'RUN' : run,
                                  'STREAM' : stream,
                                  'PROC_VER': streamConfig.Repack.ProcessingVersion,
                                  'MAX_SIZE_SINGLE_LUMI' : streamConfig.Repack.MaxSizeSingleLumi,
                                  'MAX_SIZE_MULTI_LUMI' : streamConfig.Repack.MaxSizeMultiLumi,
                                  'MIN_SIZE' : streamConfig.Repack.MinInputSize,
                                  'MAX_SIZE' : streamConfig.Repack.MaxInputSize,
                                  'MAX_EDM_SIZE' : streamConfig.Repack.MaxEdmSize,
                                  'MAX_OVER_SIZE' : streamConfig.Repack.MaxOverSize,
                                  'MAX_EVENTS' : streamConfig.Repack.MaxInputEvents,
                                  'MAX_FILES' : streamConfig.Repack.MaxInputFiles,
                                  'BLOCK_DELAY' : streamConfig.Repack.BlockCloseDelay,
                                  'CMSSW' : streamConfig.Repack.CMSSWVersion,
                                  'SCRAM_ARCH' : streamConfig.Repack.ScramArch }

        elif streamConfig.ProcessingStyle == "Express":

            specialDataset = "Stream%s" % stream
            bindsDataset.append( { 'PRIMDS' : specialDataset } )
            bindsStreamDataset.append( { 'RUN' : run,
                                         'PRIMDS' : specialDataset,
                                         'STREAM' : stream } )
            bindsSpecialDataset = { 'STREAM' : stream,
                                    'PRIMDS' : specialDataset }
            bindsDatasetScenario.append( { 'RUN' : run,
                                           'PRIMDS' : specialDataset,
                                           'SCENARIO' : streamConfig.Express.Scenario } )

            if "DQM" in streamConfig.Express.DataTiers:
                outputModuleDetails.append( { 'dataTier' : "DQM",
                                              'eventContent' : "DQM",
                                              'primaryDataset' : specialDataset } )

            bindsStorageNode.append( { 'NODE' : expressPhEDExSubscribeNode } )

            bindsPhEDExConfig.append( { 'RUN' : run,
                                        'PRIMDS' : specialDataset,
                                        'ARCHIVAL_NODE' : None,
                                        'TAPE_NODE' : None,
                                        'DISK_NODE' : expressPhEDExSubscribeNode } )

            subscriptions.append( { 'custodialSites' : [],
                                    'nonCustodialSites' : [ expressPhEDExSubscribeNode ],
                                    'autoApproveSites' : [ expressPhEDExSubscribeNode ],
                                    'priority' : "high",
                                    'primaryDataset' : specialDataset } )

            alcaSkim = None
            if "ALCARECO" in streamConfig.Express.DataTiers:
                if len(streamConfig.Express.AlcaSkims) > 0:
                    outputModuleDetails.append( { 'dataTier' : "ALCARECO",
                                                  'eventContent' : "ALCARECO",
                                                  'primaryDataset' : specialDataset } )
                    alcaSkim = ",".join(streamConfig.Express.AlcaSkims)

                    numPromptCalibProd = 0
                    for producer in streamConfig.Express.AlcaSkims:
                        if producer.startswith("PromptCalibProd"):
                            numPromptCalibProd += 1

                    if numPromptCalibProd > 0:
                        bindsPromptCalibration = { 'RUN' : run,
                                                   'STREAM' : stream,
                                                   'NUM_PRODUCER' : numPromptCalibProd }

            dqmSeq = None
            if len(streamConfig.Express.DqmSequences) > 0:
                dqmSeq = ",".join(streamConfig.Express.DqmSequences)

            streamConfig.Express.CMSSWVersion = streamConfig.VersionOverride.get(onlineVersion, onlineVersion)

            bindsCMSSWVersion.append( { 'VERSION' : streamConfig.Express.CMSSWVersion } )

            streamConfig.Express.ScramArch = tier0Config.Global.ScramArches.get(streamConfig.Express.CMSSWVersion,
                                                                                tier0Config.Global.DefaultScramArch)
            
            streamConfig.Express.RecoScramArch = None
            if streamConfig.Express.RecoCMSSWVersion != None:

                bindsCMSSWVersion.append( { 'VERSION' : streamConfig.Express.RecoCMSSWVersion } )

                streamConfig.Express.RecoScramArch = tier0Config.Global.ScramArches.get(streamConfig.Express.RecoCMSSWVersion,
                                                                                        tier0Config.Global.DefaultScramArch)

            bindsExpressConfig = { 'RUN' : run,
                                   'STREAM' : stream,
                                   'PROC_VER' : streamConfig.Express.ProcessingVersion,
                                   'WRITE_TIERS' : ",".join(streamConfig.Express.DataTiers),
                                   'GLOBAL_TAG' : streamConfig.Express.GlobalTag,
                                   'MAX_RATE' : streamConfig.Express.MaxInputRate,
                                   'MAX_EVENTS' : streamConfig.Express.MaxInputEvents,
                                   'MAX_SIZE' : streamConfig.Express.MaxInputSize,
                                   'MAX_FILES' : streamConfig.Express.MaxInputFiles,
                                   'MAX_LATENCY' : streamConfig.Express.MaxLatency,
                                   'DQM_INTERVAL' : streamConfig.Express.PeriodicHarvestInterval,
                                   'BLOCK_DELAY' : streamConfig.Express.BlockCloseDelay,
                                   'CMSSW' : streamConfig.Express.CMSSWVersion,
                                   'SCRAM_ARCH' : streamConfig.Express.ScramArch,
                                   'RECO_CMSSW' : streamConfig.Express.RecoCMSSWVersion,
                                   'RECO_SCRAM_ARCH' : streamConfig.Express.RecoScramArch,
                                   'MULTICORE' : streamConfig.Express.Multicore,
                                   'ALCA_SKIM' : alcaSkim,
                                   'DQM_SEQ' : dqmSeq }

        #
        # then configure datasets
        #
        getStreamDatasetTriggersDAO = daoFactory(classname = "RunConfig.GetStreamDatasetTriggers")
        datasetTriggers = getStreamDatasetTriggersDAO.execute(run, stream, transaction = False)

        for dataset, paths in datasetTriggers.items():

            if dataset == "Unassigned path":
                if stream == "Express" and run in [ 210114, 210116, 210120, 210121, 210178 ]:
                    continue
                if stream == "A" and run in [ 216120, 216125, 216130 ]:
                    continue

            datasetConfig = retrieveDatasetConfig(tier0Config, dataset)

            selectEvents = []
            for path in sorted(paths):
                selectEvents.append("%s:%s" % (path, runInfo['process']))

            if streamConfig.ProcessingStyle == "Bulk":

                promptRecoDelay[datasetConfig.Name] = datasetConfig.RecoDelay
                promptRecoDelayOffset[datasetConfig.Name] = datasetConfig.RecoDelayOffset

                outputModuleDetails.append( { 'dataTier' : "RAW",
                                              'eventContent' : "ALL",
                                              'selectEvents' : selectEvents,
                                              'primaryDataset' : dataset } )

                bindsPhEDExConfig.append( { 'RUN' : run,
                                            'PRIMDS' : dataset,
                                            'ARCHIVAL_NODE' : datasetConfig.ArchivalNode,
                                            'TAPE_NODE' : datasetConfig.TapeNode,
                                            'DISK_NODE' : datasetConfig.DiskNode } )

                custodialSites = []
                nonCustodialSites = []
                autoApproveSites = []
                if datasetConfig.ArchivalNode != None:
                    bindsStorageNode.append( { 'NODE' : datasetConfig.ArchivalNode } )
                    custodialSites.append(datasetConfig.ArchivalNode)
                    autoApproveSites.append(datasetConfig.ArchivalNode)
                if datasetConfig.TapeNode != None:
                    bindsStorageNode.append( { 'NODE' : datasetConfig.TapeNode } )
                    custodialSites.append(datasetConfig.TapeNode)
                if datasetConfig.DiskNode != None:
                    bindsStorageNode.append( { 'NODE' : datasetConfig.DiskNode } )
                    nonCustodialSites.append(datasetConfig.DiskNode)
                    autoApproveSites.append(datasetConfig.DiskNode)

                if len(custodialSites) > 0 or len(nonCustodialSites) > 0:
                    subscriptions.append( { 'custodialSites' : custodialSites,
                                            'custodialSubType' : "Replica",
                                            'nonCustodialSites' : nonCustodialSites,
                                            'autoApproveSites' : autoApproveSites,
                                            'priority' : "high",
                                            'primaryDataset' : dataset,
                                            'dataTier' : "RAW" } )

                #
                # set subscriptions for error dataset
                #
                custodialSites = []
                nonCustodialSites = []
                autoApproveSites = []
                if datasetConfig.ArchivalNode != None:
                    custodialSites.append(datasetConfig.ArchivalNode)
                    autoApproveSites.append(datasetConfig.ArchivalNode)
                if datasetConfig.ArchivalNode != expressPhEDExInjectNode:
                    nonCustodialSites.append(expressPhEDExInjectNode)
                    autoApproveSites.append(expressPhEDExInjectNode)

                if len(custodialSites) > 0 or len(nonCustodialSites) > 0:
                    subscriptions.append( { 'custodialSites' : custodialSites,
                                            'custodialSubType' : "Replica",
                                            'nonCustodialSites' : nonCustodialSites,
                                            'autoApproveSites' : autoApproveSites,
                                            'priority' : "high",
                                            'primaryDataset' : "%s-Error" % dataset,
                                            'dataTier' : "RAW" } )


            elif streamConfig.ProcessingStyle == "Express":

                for dataTier in streamConfig.Express.DataTiers:
                    if dataTier not in [ "ALCARECO", "DQM" ]:

                        outputModuleDetails.append( { 'dataTier' : dataTier,
                                                      'eventContent' : dataTier,
                                                      'selectEvents' : selectEvents,
                                                      'primaryDataset' : dataset } )

                bindsPhEDExConfig.append( { 'RUN' : run,
                                            'PRIMDS' : dataset,
                                            'ARCHIVAL_NODE' : None,
                                            'TAPE_NODE' : None,
                                            'DISK_NODE' : expressPhEDExSubscribeNode } )

                subscriptions.append( { 'custodialSites' : [],
                                        'nonCustodialSites' : [ expressPhEDExSubscribeNode ],
                                        'autoApproveSites' : [ expressPhEDExSubscribeNode ],
                                        'priority' : "high",
                                        'primaryDataset' : dataset } )

        #
        # finally create WMSpec
        #
        outputs = {}
        if streamConfig.ProcessingStyle == "Bulk":

            taskName = "Repack"
            workflowName = "Repack_Run%d_Stream%s" % (run, stream)

            specArguments = {}

            specArguments['Group'] = "unknown"
            specArguments['Requestor'] = "unknown"
            specArguments['RequestorDN'] = "unknown"

            specArguments['TimePerEvent'] = 1
            specArguments['SizePerEvent'] = 200
            specArguments['Memory'] = 1800

            specArguments['RequestPriority'] = 0

            specArguments['CMSSWVersion'] = streamConfig.Repack.CMSSWVersion
            specArguments['ScramArch'] = streamConfig.Repack.ScramArch

            specArguments['ProcessingVersion'] = streamConfig.Repack.ProcessingVersion
            specArguments['MaxSizeSingleLumi'] = streamConfig.Repack.MaxSizeSingleLumi
            specArguments['MaxSizeMultiLumi'] = streamConfig.Repack.MaxSizeMultiLumi
            specArguments['MinInputSize'] = streamConfig.Repack.MinInputSize
            specArguments['MaxInputSize'] = streamConfig.Repack.MaxInputSize
            specArguments['MaxEdmSize'] = streamConfig.Repack.MaxEdmSize
            specArguments['MaxOverSize'] = streamConfig.Repack.MaxOverSize
            specArguments['MaxInputEvents'] = streamConfig.Repack.MaxInputEvents
            specArguments['MaxInputFiles'] = streamConfig.Repack.MaxInputFiles

            specArguments['UnmergedLFNBase'] = "/store/unmerged/%s" % runInfo['bulk_data_type']
            if runInfo['backfill']:
                specArguments['MergedLFNBase'] = "/store/backfill/%s/%s" % (runInfo['backfill'],
                                                                            runInfo['bulk_data_type'])
            else:
                specArguments['MergedLFNBase'] = "/store/%s" % runInfo['bulk_data_type']

            specArguments['BlockCloseDelay'] = streamConfig.Repack.BlockCloseDelay

        elif streamConfig.ProcessingStyle == "Express":

            taskName = "Express"
            workflowName = "Express_Run%d_Stream%s" % (run, stream)

            specArguments = {}

            specArguments['Group'] = "unknown"
            specArguments['Requestor'] = "unknown"
            specArguments['RequestorDN'] = "unknown"

            specArguments['TimePerEvent'] = 12
            specArguments['SizePerEvent'] = 512
            specArguments['Memory'] = 1800

            if streamConfig.Express.Multicore:
                specArguments['Multicore'] = streamConfig.Express.Multicore
                specArguments['Memory'] = 1800 * streamConfig.Express.Multicore

            specArguments['RequestPriority'] = 0

            specArguments['ProcessingString'] = "Express"
            specArguments['ProcessingVersion'] = streamConfig.Express.ProcessingVersion
            specArguments['Scenario'] = streamConfig.Express.Scenario

            specArguments['CMSSWVersion'] = streamConfig.Express.CMSSWVersion
            specArguments['ScramArch'] = streamConfig.Express.ScramArch
            specArguments['RecoCMSSWVersion'] = streamConfig.Express.RecoCMSSWVersion
            specArguments['RecoScramArch'] = streamConfig.Express.RecoScramArch

            specArguments['GlobalTag'] = streamConfig.Express.GlobalTag
            specArguments['GlobalTagTransaction'] = "Express_%d" % run
            specArguments['GlobalTagConnect'] = streamConfig.Express.GlobalTagConnect

            specArguments['MaxInputRate'] = streamConfig.Express.MaxInputRate
            specArguments['MaxInputEvents'] = streamConfig.Express.MaxInputEvents
            specArguments['MaxInputSize'] = streamConfig.Express.MaxInputSize
            specArguments['MaxInputFiles'] = streamConfig.Express.MaxInputFiles
            specArguments['MaxLatency'] = streamConfig.Express.MaxLatency
            specArguments['AlcaSkims'] = streamConfig.Express.AlcaSkims
            specArguments['DqmSequences'] = streamConfig.Express.DqmSequences
            specArguments['AlcaHarvestTimeout'] = runInfo['ah_timeout']
            specArguments['AlcaHarvestDir'] = runInfo['ah_dir']
            specArguments['DQMUploadProxy'] = dqmUploadProxy
            specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl']
            specArguments['StreamName'] = stream
            specArguments['SpecialDataset'] = specialDataset

            specArguments['UnmergedLFNBase'] = "/store/unmerged/express"
            specArguments['MergedLFNBase'] = "/store/express"
            if runInfo['backfill']:
                specArguments['MergedLFNBase'] = "/store/backfill/%s/express" % runInfo['backfill']
            else:
                specArguments['MergedLFNBase'] = "/store/express"

            specArguments['PeriodicHarvestInterval'] = streamConfig.Express.PeriodicHarvestInterval

            specArguments['BlockCloseDelay'] = streamConfig.Express.BlockCloseDelay

        if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]:

            specArguments['RunNumber'] = run
            specArguments['AcquisitionEra'] = tier0Config.Global.AcquisitionEra
            specArguments['Outputs'] = outputModuleDetails
            specArguments['OverrideCatalog'] = "trivialcatalog_file:/cvmfs/cms.cern.ch/SITECONF/T2_CH_CERN/Tier0/override_catalog.xml?protocol=override"
            specArguments['ValidStatus'] = "VALID"

            specArguments['SiteWhitelist'] = [ "T2_CH_CERN_T0" ]
            specArguments['SiteBlacklist'] = []

        if streamConfig.ProcessingStyle == "Bulk":
            factory = RepackWorkloadFactory()
            wmSpec = factory.factoryWorkloadConstruction(workflowName, specArguments)
            wmSpec.setPhEDExInjectionOverride(runInfo['bulk_data_loc'])
            for subscription in subscriptions:
                wmSpec.setSubscriptionInformation(**subscription)
        elif streamConfig.ProcessingStyle == "Express":
            factory = ExpressWorkloadFactory()
            wmSpec = factory.factoryWorkloadConstruction(workflowName, specArguments)
            wmSpec.setPhEDExInjectionOverride(expressPhEDExInjectNode)
            for subscription in subscriptions:
                wmSpec.setSubscriptionInformation(**subscription)

        if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]:
            wmSpec.setOwnerDetails("*****@*****.**", "T0",
                                   { 'vogroup': 'DEFAULT', 'vorole': 'DEFAULT',
                                     'dn' : "*****@*****.**" } )

            wmSpec.setupPerformanceMonitoring(maxRSS = 10485760, maxVSize = 10485760,
                                              softTimeout = 604800, gracePeriod = 3600)

            wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath = specDirectory)

        filesetName = "Run%d_Stream%s" % (run, stream)
        fileset = Fileset(filesetName)

        #
        # create workflow (currently either repack or express)
        #
        try:
            myThread.transaction.begin()
            if len(bindsCMSSWVersion) > 0:
                insertCMSSWVersionDAO.execute(bindsCMSSWVersion, conn = myThread.transaction.conn, transaction = True)
            if len(bindsDataset) > 0:
                insertDatasetDAO.execute(bindsDataset, conn = myThread.transaction.conn, transaction = True)
            if len(bindsStreamDataset) > 0:
                insertStreamDatasetDAO.execute(bindsStreamDataset, conn = myThread.transaction.conn, transaction = True)
            if len(bindsRepackConfig) > 0:
                insertRepackConfigDAO.execute(bindsRepackConfig, conn = myThread.transaction.conn, transaction = True)
            if len(bindsPromptCalibration) > 0:
                insertPromptCalibrationDAO.execute(bindsPromptCalibration, conn = myThread.transaction.conn, transaction = True)
            if len(bindsExpressConfig) > 0:
                insertExpressConfigDAO.execute(bindsExpressConfig, conn = myThread.transaction.conn, transaction = True)
            if len(bindsSpecialDataset) > 0:
                insertSpecialDatasetDAO.execute(bindsSpecialDataset, conn = myThread.transaction.conn, transaction = True)
            if len(bindsDatasetScenario) > 0:
                insertDatasetScenarioDAO.execute(bindsDatasetScenario, conn = myThread.transaction.conn, transaction = True)
            if len(bindsStorageNode) > 0:
                insertStorageNodeDAO.execute(bindsStorageNode, conn = myThread.transaction.conn, transaction = True)
            if len(bindsPhEDExConfig) > 0:
                insertPhEDExConfigDAO.execute(bindsPhEDExConfig, conn = myThread.transaction.conn, transaction = True)
            insertStreamStyleDAO.execute(bindsStreamStyle, conn = myThread.transaction.conn, transaction = True)
            if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]:
                insertStreamFilesetDAO.execute(run, stream, filesetName, conn = myThread.transaction.conn, transaction = True)
                fileset.load()
                wmbsHelper.createSubscription(wmSpec.getTask(taskName), fileset, alternativeFilesetClose = True)
                insertWorkflowMonitoringDAO.execute([fileset.id],  conn = myThread.transaction.conn, transaction = True)
            if streamConfig.ProcessingStyle == "Bulk":
                bindsRecoReleaseConfig = []
                for fileset, primds in wmbsHelper.getMergeOutputMapping().items():
                    bindsRecoReleaseConfig.append( { 'RUN' : run,
                                                     'PRIMDS' : primds,
                                                     'FILESET' : fileset,
                                                     'RECODELAY' : promptRecoDelay[primds],
                                                     'RECODELAYOFFSET' : promptRecoDelayOffset[primds] } )
                insertRecoReleaseConfigDAO.execute(bindsRecoReleaseConfig, conn = myThread.transaction.conn, transaction = True)
            elif streamConfig.ProcessingStyle == "Express":
                markWorkflowsInjectedDAO.execute([workflowName], injected = True, conn = myThread.transaction.conn, transaction = True)
        except Exception as ex:
            logging.exception(ex)
            myThread.transaction.rollback()
            raise RuntimeError("Problem in configureRunStream() database transaction !")
        else:
            myThread.transaction.commit()

    else:

        # should we do anything for local runs ?
        pass
    return
Exemplo n.º 34
0
        myFile.create()
        inputFileset.addFile(myFile)

        dbsFile = DBSBufferFile(lfn = dbsResult["LogicalFileName"], size = dbsResult["FileSize"],
                                events = dbsResult["NumberOfEvents"], checksums = {"cksum": dbsResult["Checksum"]},
                                locations = "cmssrm.fnal.gov", status = "LOCAL")
        dbsFile.setDatasetPath(datasetPath)
        dbsFile.setAlgorithm(appName = "cmsRun", appVer = "Unknown", appFam = "Unknown",
                             psetHash = "Unknown", configContent = "Unknown")
        dbsFile.create()

    inputFileset.commit()
    inputFileset.markOpen(False)
    return

myThread = threading.currentThread()
myThread.transaction.begin()
for workloadTask in workload.taskIterator():
    inputFileset = Fileset(name = workloadTask.getPathName())
    inputFileset.create()

    inputDataset = workloadTask.inputDataset()
    inputDatasetPath = "/%s/%s/%s" % (inputDataset.primary,
                                      inputDataset.processed,
                                      inputDataset.tier)
    injectFilesFromDBS(inputFileset, inputDatasetPath)
    myWMBSHelper = WMBSHelper(workload, workloadTask.getPathName(), cachepath=os.getcwd())
    myWMBSHelper.createSubscription(workloadTask, inputFileset)

myThread.transaction.commit()
Exemplo n.º 35
0
    def testMonteCarloFromGEN(self):
        """
        _testMonteCarloFromGEN_

        Create a MonteCarloFromGEN workflow and verify it installs into WMBS
        correctly.
        """
        arguments = getTestArguments()
        arguments["ProcConfigCacheID"] = self.injectConfig()
        arguments["CouchDBName"] = "mclhe_t"
        testWorkload = monteCarloFromGENWorkload("TestWorkload", arguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")
        
        testWMBSHelper = WMBSHelper(testWorkload, "MonteCarloFromGEN", "SomeBlock")
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        procWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/MonteCarloFromGEN")
        procWorkflow.load()

        self.assertEqual(len(procWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")
        self.assertEqual(procWorkflow.wfType, 'lheproduction')

        goldenOutputMods = ["outputRECORECO", "outputALCARECOALCARECO"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

        logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")            

        topLevelFileset = Fileset(name = "TestWorkload-MonteCarloFromGEN-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow)
        procSubscription.loadData()

        self.assertEqual(procSubscription["type"], "Production",
                         "Error: Wrong subscription type: %s" % procSubscription["type"])
        self.assertEqual(procSubscription["split_algo"], "LumiBased",
                         "Error: Wrong split algo.")

        unmergedReco = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputRECORECO")
        unmergedReco.loadData()
        recoMergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO")
        recoMergeWorkflow.load()
        mergeSubscription = Subscription(fileset = unmergedReco, workflow = recoMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo: %s" % mergeSubscription["split_algo"])

        unmergedAlca = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputALCARECOALCARECO")
        unmergedAlca.loadData()
        alcaMergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO")
        alcaMergeWorkflow.load()
        mergeSubscription = Subscription(fileset = unmergedAlca, workflow = alcaMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo: %s" % mergeSubscription["split_algo"])

        for procOutput in ["outputRECORECO", "outputALCARECOALCARECO"]:
            unmerged = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % procOutput)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENCleanupUnmerged%s" % procOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/MonteCarloFromGEN/LogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/merged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/MonteCarloFromGENoutputRECORECOMergeLogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/merged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/MonteCarloFromGENoutputALCARECOALCARECOMergeLogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")                

        return
Exemplo n.º 36
0
    def testReReco(self):
        """
        _testReReco_

        Verify that ReReco workflows can be created and inserted into WMBS
        correctly.  The ReReco workflow is just a DataProcessing workflow with
        skims tacked on.  We'll only test the skims here.
        """
        skimConfig = self.injectSkimConfig()
        dataProcArguments = getTestArguments()
        dataProcArguments["SkimConfigs"] = [{"SkimName": "SomeSkim",
                                             "SkimInput": "outputRECORECO",
                                             "SkimSplitAlgo": "FileBased",
                                             "SkimSplitArgs": {"files_per_job": 1,
                                                               "include_parents": True},
                                             "ConfigCacheID": skimConfig,
                                             "Scenario": None}]
        dataProcArguments["CouchURL"] = os.environ["COUCHURL"]
        dataProcArguments["CouchDBName"] = "rereco_t"

        testWorkload = rerecoWorkload("TestWorkload", dataProcArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")
        
        testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock")
        testWMBSHelper.createSubscription()

        skimWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim")
        skimWorkflow.load()

        self.assertEqual(len(skimWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["SkimA", "SkimB"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")            

        topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/merged-Merged")
        topLevelFileset.loadData()

        skimSubscription = Subscription(fileset = topLevelFileset, workflow = skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-Skim%s" % skimOutput)
            unmerged.loadData()
            mergeWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMergeSkim%s" % skimOutput)
            mergeWorkflow.load()
            mergeSubscription = Subscription(fileset = unmerged, workflow = mergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                             "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/unmerged-Skim%s" % skimOutput)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            skimMergeLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput)
            skimMergeLogCollect.loadData()
            skimMergeLogCollectWorkflow = Workflow(name = "TestWorkload",
                                                   task = "/TestWorkload/DataProcessing/DataProcessingMergeoutputRECORECO/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput))
            skimMergeLogCollectWorkflow.load()
            logCollectSub = Subscription(fileset = skimMergeLogCollect, workflow = skimMergeLogCollectWorkflow)
            logCollectSub.loadData()
            
            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "EndOfRun",
                             "Error: Wrong split algo.")

        return
Exemplo n.º 37
0
def releasePromptReco(tier0Config, specDirectory, dqmUploadProxy):
    """
    _releasePromptReco_

    Called by Tier0Feeder

    Finds all run/primds that need to be released for PromptReco
    ( run.end_time + reco_release_config.delay > now
      AND run.end_time > 0 )

    Create workflows and subscriptions for the processing
    of runs/datasets.

    """
    logging.debug("releasePromptReco()")
    myThread = threading.currentThread()

    daoFactory = DAOFactory(package="T0.WMBS",
                            logger=logging,
                            dbinterface=myThread.dbi)

    insertDatasetScenarioDAO = daoFactory(
        classname="RunConfig.InsertDatasetScenario")
    insertCMSSWVersionDAO = daoFactory(
        classname="RunConfig.InsertCMSSWVersion")
    insertRecoConfigDAO = daoFactory(classname="RunConfig.InsertRecoConfig")
    insertStorageNodeDAO = daoFactory(classname="RunConfig.InsertStorageNode")
    insertPhEDExConfigDAO = daoFactory(
        classname="RunConfig.InsertPhEDExConfig")
    releasePromptRecoDAO = daoFactory(classname="RunConfig.ReleasePromptReco")
    insertWorkflowMonitoringDAO = daoFactory(
        classname="RunConfig.InsertWorkflowMonitoring")

    bindsDatasetScenario = []
    bindsCMSSWVersion = []
    bindsRecoConfig = []
    bindsStorageNode = []
    bindsReleasePromptReco = []

    # mark workflows as injected
    wmbsDaoFactory = DAOFactory(package="WMCore.WMBS",
                                logger=logging,
                                dbinterface=myThread.dbi)
    markWorkflowsInjectedDAO = wmbsDaoFactory(
        classname="Workflow.MarkInjectedWorkflows")

    #
    # for creating PromptReco specs
    #
    recoSpecs = {}

    #
    # for PhEDEx subscription settings
    #
    subscriptions = []

    findRecoReleaseDAO = daoFactory(classname="RunConfig.FindRecoRelease")
    recoRelease = findRecoReleaseDAO.execute(transaction=False)

    for run in sorted(recoRelease.keys()):

        # retrieve some basic run information
        getRunInfoDAO = daoFactory(classname="RunConfig.GetRunInfo")
        runInfo = getRunInfoDAO.execute(run, transaction=False)[0]

        # retrieve phedex configs for run
        getPhEDExConfigDAO = daoFactory(classname="RunConfig.GetPhEDExConfig")
        phedexConfigs = getPhEDExConfigDAO.execute(run, transaction=False)

        for (dataset, fileset, repackProcVer) in recoRelease[run]:

            bindsReleasePromptReco.append({
                'RUN': run,
                'PRIMDS': dataset,
                'NOW': int(time.time())
            })

            datasetConfig = retrieveDatasetConfig(tier0Config, dataset)

            bindsDatasetScenario.append({
                'RUN': run,
                'PRIMDS': dataset,
                'SCENARIO': datasetConfig.Scenario
            })

            if datasetConfig.CMSSWVersion != None:
                bindsCMSSWVersion.append(
                    {'VERSION': datasetConfig.CMSSWVersion})

            alcaSkim = None
            if len(datasetConfig.AlcaSkims) > 0:
                alcaSkim = ",".join(datasetConfig.AlcaSkims)

            dqmSeq = None
            if len(datasetConfig.DqmSequences) > 0:
                dqmSeq = ",".join(datasetConfig.DqmSequences)

            datasetConfig.ScramArch = tier0Config.Global.ScramArches.get(
                datasetConfig.CMSSWVersion,
                tier0Config.Global.DefaultScramArch)

            bindsRecoConfig.append({
                'RUN': run,
                'PRIMDS': dataset,
                'DO_RECO': int(datasetConfig.DoReco),
                'RECO_SPLIT': datasetConfig.RecoSplit,
                'WRITE_RECO': int(datasetConfig.WriteRECO),
                'WRITE_DQM': int(datasetConfig.WriteDQM),
                'WRITE_AOD': int(datasetConfig.WriteAOD),
                'PROC_VER': datasetConfig.ProcessingVersion,
                'ALCA_SKIM': alcaSkim,
                'DQM_SEQ': dqmSeq,
                'BLOCK_DELAY': datasetConfig.BlockCloseDelay,
                'CMSSW': datasetConfig.CMSSWVersion,
                'SCRAM_ARCH': datasetConfig.ScramArch,
                'MULTICORE': datasetConfig.Multicore,
                'GLOBAL_TAG': datasetConfig.GlobalTag
            })

            phedexConfig = phedexConfigs[dataset]

            if datasetConfig.WriteAOD:

                custodialSites = []
                nonCustodialSites = []
                autoApproveSites = []

                if phedexConfig['tape_node'] != None:
                    custodialSites.append(phedexConfig['tape_node'])
                if phedexConfig['disk_node'] != None:
                    nonCustodialSites.append(phedexConfig['disk_node'])
                    autoApproveSites.append(phedexConfig['disk_node'])

                subscriptions.append({
                    'custodialSites': custodialSites,
                    'custodialSubType': "Replica",
                    'nonCustodialSites': nonCustodialSites,
                    'autoApproveSites': autoApproveSites,
                    'priority': "high",
                    'primaryDataset': dataset,
                    'dataTier': "AOD"
                })

            if len(datasetConfig.AlcaSkims) > 0:
                if phedexConfig['tape_node'] != None:
                    subscriptions.append({
                        'custodialSites': [phedexConfig['tape_node']],
                        'custodialSubType':
                        "Replica",
                        'nonCustodialSites': [],
                        'autoApproveSites': [],
                        'priority':
                        "high",
                        'primaryDataset':
                        dataset,
                        'dataTier':
                        "ALCARECO"
                    })
            if datasetConfig.WriteDQM:
                if phedexConfig['tape_node'] != None:
                    subscriptions.append({
                        'custodialSites': [phedexConfig['tape_node']],
                        'custodialSubType':
                        "Replica",
                        'nonCustodialSites': [],
                        'autoApproveSites': [],
                        'priority':
                        "high",
                        'primaryDataset':
                        dataset,
                        'dataTier':
                        tier0Config.Global.DQMDataTier
                    })

            if datasetConfig.WriteRECO:
                if phedexConfig['disk_node'] != None:
                    subscriptions.append({
                        'custodialSites': [],
                        'nonCustodialSites': [phedexConfig['disk_node']],
                        'autoApproveSites': [phedexConfig['disk_node']],
                        'priority':
                        "high",
                        'primaryDataset':
                        dataset,
                        'dataTier':
                        "RECO"
                    })

            writeTiers = []
            if datasetConfig.WriteRECO:
                writeTiers.append("RECO")
            if datasetConfig.WriteAOD:
                writeTiers.append("AOD")
            if datasetConfig.WriteDQM:
                writeTiers.append(tier0Config.Global.DQMDataTier)
            if len(datasetConfig.AlcaSkims) > 0:
                writeTiers.append("ALCARECO")

            if datasetConfig.DoReco and len(writeTiers) > 0:

                #
                # create WMSpec
                #
                taskName = "Reco"
                workflowName = "PromptReco_Run%d_%s" % (run, dataset)

                specArguments = {}

                specArguments['TimePerEvent'] = 12
                specArguments['SizePerEvent'] = 512
                specArguments['Memory'] = 1800

                if datasetConfig.Multicore:
                    specArguments['Multicore'] = datasetConfig.Multicore
                    specArguments['Memory'] = 1800 * datasetConfig.Multicore

                specArguments['RequestPriority'] = 0

                specArguments['AcquisitionEra'] = runInfo['acq_era']
                specArguments['CMSSWVersion'] = datasetConfig.CMSSWVersion
                specArguments['ScramArch'] = datasetConfig.ScramArch

                specArguments['RunNumber'] = run

                specArguments['SplittingAlgo'] = "EventBased"
                specArguments['EventsPerJob'] = datasetConfig.RecoSplit

                specArguments['ProcessingString'] = "PromptReco"
                specArguments[
                    'ProcessingVersion'] = datasetConfig.ProcessingVersion
                specArguments['Scenario'] = datasetConfig.Scenario

                specArguments['GlobalTag'] = datasetConfig.GlobalTag
                specArguments[
                    'GlobalTagConnect'] = datasetConfig.GlobalTagConnect

                specArguments['InputDataset'] = "/%s/%s-%s/RAW" % (
                    dataset, runInfo['acq_era'], repackProcVer)

                specArguments['WriteTiers'] = writeTiers
                specArguments['AlcaSkims'] = datasetConfig.AlcaSkims
                specArguments['DqmSequences'] = datasetConfig.DqmSequences

                specArguments[
                    'UnmergedLFNBase'] = "/store/unmerged/%s" % runInfo[
                        'bulk_data_type']
                if runInfo['backfill']:
                    specArguments[
                        'MergedLFNBase'] = "/store/backfill/%s/%s" % (
                            runInfo['backfill'], runInfo['bulk_data_type'])
                else:
                    specArguments['MergedLFNBase'] = "/store/%s" % runInfo[
                        'bulk_data_type']

                specArguments['ValidStatus'] = "VALID"

                specArguments['EnableHarvesting'] = "True"
                specArguments['DQMUploadProxy'] = dqmUploadProxy
                specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl']

                specArguments[
                    'BlockCloseDelay'] = datasetConfig.BlockCloseDelay

                specArguments['SiteWhitelist'] = datasetConfig.SiteWhitelist
                specArguments['SiteBlacklist'] = []
                specArguments['TrustSitelists'] = "True"

                factory = PromptRecoWorkloadFactory()
                wmSpec = factory.factoryWorkloadConstruction(
                    workflowName, specArguments)

                wmSpec.setPhEDExInjectionOverride(runInfo['bulk_data_loc'])
                for subscription in subscriptions:
                    wmSpec.setSubscriptionInformation(**subscription)

                wmSpec.setOwnerDetails(
                    "*****@*****.**", "T0", {
                        'vogroup': 'DEFAULT',
                        'vorole': 'DEFAULT',
                        'dn': "*****@*****.**"
                    })

                wmSpec.setupPerformanceMonitoring(maxRSS=10485760,
                                                  maxVSize=10485760,
                                                  softTimeout=604800,
                                                  gracePeriod=3600)

                wmbsHelper = WMBSHelper(wmSpec,
                                        taskName,
                                        cachepath=specDirectory)

                recoSpecs[workflowName] = (wmbsHelper, wmSpec, fileset)

    try:
        myThread.transaction.begin()
        if len(bindsDatasetScenario) > 0:
            insertDatasetScenarioDAO.execute(bindsDatasetScenario,
                                             conn=myThread.transaction.conn,
                                             transaction=True)
        if len(bindsCMSSWVersion) > 0:
            insertCMSSWVersionDAO.execute(bindsCMSSWVersion,
                                          conn=myThread.transaction.conn,
                                          transaction=True)
        if len(bindsRecoConfig) > 0:
            insertRecoConfigDAO.execute(bindsRecoConfig,
                                        conn=myThread.transaction.conn,
                                        transaction=True)
        if len(bindsStorageNode) > 0:
            insertStorageNodeDAO.execute(bindsStorageNode,
                                         conn=myThread.transaction.conn,
                                         transaction=True)
        if len(bindsReleasePromptReco) > 0:
            releasePromptRecoDAO.execute(bindsReleasePromptReco,
                                         conn=myThread.transaction.conn,
                                         transaction=True)
        for (wmbsHelper, wmSpec, fileset) in recoSpecs.values():
            wmbsHelper.createSubscription(wmSpec.getTask(taskName),
                                          Fileset(id=fileset),
                                          alternativeFilesetClose=True)
            insertWorkflowMonitoringDAO.execute([fileset],
                                                conn=myThread.transaction.conn,
                                                transaction=True)
        if len(recoSpecs) > 0:
            markWorkflowsInjectedDAO.execute(recoSpecs.keys(),
                                             injected=True,
                                             conn=myThread.transaction.conn,
                                             transaction=True)
    except Exception as ex:
        logging.exception(ex)
        myThread.transaction.rollback()
        raise RuntimeError(
            "Problem in releasePromptReco() database transaction !")
    else:
        myThread.transaction.commit()

    return
Exemplo n.º 38
0
    def testStoreResults(self):
        """
        _testStoreResults_

        Create a StoreResults workflow and verify it installs into WMBS
        correctly.
        """
        arguments = getTestArguments()
        arguments.update({'CmsPath': "/uscmst1/prod/sw/cms"})
        testWorkload = storeResultsWorkload("TestWorkload", arguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")

        testWMBSHelper = WMBSHelper(testWorkload, "StoreResults", "SomeBlock")
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask,
                                          testWMBSHelper.topLevelFileset)

        testWorkflow = Workflow(name="TestWorkload",
                                task="/TestWorkload/StoreResults")
        testWorkflow.load()

        self.assertEqual(len(testWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["Merged"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = testWorkflow.outputMap[goldenOutputMod][0][
                "merged_output_fileset"]
            unmergedOutput = testWorkflow.outputMap[goldenOutputMod][0][
                "output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(
                mergedOutput.name,
                "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod,
                "Error: Merged output fileset is wrong: %s" %
                mergedOutput.name)
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod,
                "Error: Unmerged output fileset is wrong: %s." %
                unmergedOutput.name)

        logArchOutput = testWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = testWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name,
                         "/TestWorkload/StoreResults/merged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name,
                         "/TestWorkload/StoreResults/merged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-StoreResults-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset=topLevelFileset,
                                        workflow=testWorkflow)
        procSubscription.loadData()

        self.assertEqual(procSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"],
                         "ParentlessMergeBySize", "Error: Wrong split algo.")

        return
Exemplo n.º 39
0
    def testStoreResults(self):
        """
        _testStoreResults_

        Create a StoreResults workflow and verify it installs into WMBS
        correctly.
        """
        arguments = getTestArguments()
        arguments.update({'CmsPath' :"/uscmst1/prod/sw/cms"})
        testWorkload = storeResultsWorkload("TestWorkload", arguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")

        testWMBSHelper = WMBSHelper(testWorkload, "StoreResults", "SomeBlock")
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        testWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/StoreResults")
        testWorkflow.load()

        self.assertEqual(len(testWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["Merged"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = testWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = testWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/StoreResults/merged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong: %s."%unmergedOutput.name)

        logArchOutput = testWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = testWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()


        self.assertEqual(logArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/StoreResults/merged-logArchive",
                         "Error: LogArchive output fileset is wrong.")


        topLevelFileset = Fileset(name = "TestWorkload-StoreResults-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset = topLevelFileset, workflow = testWorkflow)
        procSubscription.loadData()

        self.assertEqual(procSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo.")


        return
Exemplo n.º 40
0
    def testReReco(self):
        """
        _testReReco_

        Verify that ReReco workflows can be created and inserted into WMBS
        correctly.  The ReReco workflow is just a DataProcessing workflow with
        skims tacked on.  We'll test the skims and DQMHarvest here.
        """
        skimConfig = self.injectSkimConfig()
        recoConfig = self.injectReRecoConfig()
        dataProcArguments = getTestArguments()
        dataProcArguments['ProcessingString']  = 'ProcString'
        dataProcArguments['ProcConfigCacheID'] = recoConfig
        dataProcArguments["SkimConfigs"] = [{"SkimName": "SomeSkim",
                                             "SkimInput": "RECOoutput",
                                             "SkimSplitAlgo": "FileBased",
                                             "SkimSplitArgs": {"files_per_job": 1,
                                                               "include_parents": True},
                                             "ConfigCacheID": skimConfig,
                                             "Scenario": None}]
        dataProcArguments["CouchURL"] = os.environ["COUCHURL"]
        dataProcArguments["CouchDBName"] = "rereco_t"

        testWorkload = rerecoWorkload("TestWorkload", dataProcArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")

        self.assertEqual(testWorkload.data.tasks.DataProcessing.tree.children.DataProcessingMergeRECOoutput.\
                         tree.children.SomeSkim.tree.children.SomeSkimMergeSkimB.steps.cmsRun1.output.modules.\
                         Merged.mergedLFNBase,
                         '/store/data/WMAgentCommissioning10/MinimumBias/USER/SkimBFilter-ProcString-v2')
        
        testWMBSHelper = WMBSHelper(testWorkload, "DataProcessing", "SomeBlock")
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        skimWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim")
        skimWorkflow.load()

        self.assertEqual(len(skimWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = ["SkimA", "SkimB"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = skimWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong: %s" % unmergedOutput.name)

        logArchOutput = skimWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = skimWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")            

        topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/merged-Merged")
        topLevelFileset.loadData()

        skimSubscription = Subscription(fileset = topLevelFileset, workflow = skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput)
            unmerged.loadData()
            mergeWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s" % skimOutput)
            mergeWorkflow.load()
            mergeSubscription = Subscription(fileset = unmerged, workflow = mergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                             "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            unmerged = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/unmerged-Skim%s" % skimOutput)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimCleanupUnmergedSkim%s" % skimOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        for skimOutput in ["A", "B"]:
            skimMergeLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/merged-logArchive" % skimOutput)
            skimMergeLogCollect.loadData()
            skimMergeLogCollectWorkflow = Workflow(name = "TestWorkload",
                                                   task = "/TestWorkload/DataProcessing/DataProcessingMergeRECOoutput/SomeSkim/SomeSkimMergeSkim%s/SomeSkimSkim%sMergeLogCollect" % (skimOutput, skimOutput))
            skimMergeLogCollectWorkflow.load()
            logCollectSub = Subscription(fileset = skimMergeLogCollect, workflow = skimMergeLogCollectWorkflow)
            logCollectSub.loadData()
            
            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algo.")

        dqmWorkflow = Workflow(name = "TestWorkload",
                               task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputDQMHarvestMerged")
        dqmWorkflow.load()

        topLevelFileset = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/merged-Merged")
        topLevelFileset.loadData()

        dqmSubscription = Subscription(fileset = topLevelFileset, workflow = dqmWorkflow)
        dqmSubscription.loadData()

        self.assertEqual(dqmSubscription["type"], "Harvesting",
                         "Error: Wrong subscription type.")
        self.assertEqual(dqmSubscription["split_algo"], "Harvest",
                         "Error: Wrong split algo.")

        logArchOutput = dqmWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputDQMHarvestMerged/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputDQMHarvestMerged/unmerged-logArchive",
                     "Error: LogArchive output fileset is wrong.")

        dqmHarvestLogCollect = Fileset(name = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputDQMHarvestMerged/unmerged-logArchive")
        dqmHarvestLogCollect.loadData()
        dqmHarvestLogCollectWorkflow = Workflow(name = "TestWorkload",
                                               task = "/TestWorkload/DataProcessing/DataProcessingMergeDQMoutput/DataProcessingMergeDQMoutputDQMHarvestMerged/DataProcessingMergeDQMoutputMergedDQMHarvestLogCollect")
        dqmHarvestLogCollectWorkflow.load()

        logCollectSub = Subscription(fileset = dqmHarvestLogCollect, workflow = dqmHarvestLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        return
Exemplo n.º 41
0
    def testTruncatedWFInsertion(self):
        """
        _testTruncatedWFInsertion_

        """
        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName = 'site1', seName = 'goodse.cern.ch',
                                   ceName = 'site1', plugin = "TestPlugin")
        resourceControl.insertSite(siteName = 'site2', seName = 'goodse2.cern.ch',
                                   ceName = 'site2', plugin = "TestPlugin")        

        testWorkload = self.createTestWMSpec()
        testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock", cachepath = self.workDir)
        testWMBSHelper.createSubscription()

        testWorkload.truncate("ResubmitTestWorkload", "/TestWorkload/ProcessingTask/MergeTask",
                              "someserver", "somedatabase")
        testResubmitWMBSHelper = WMBSHelper(testWorkload, "SomeBlock2", cachepath = self.workDir)
        testResubmitWMBSHelper.createSubscription()

        mergeWorkflow = Workflow(name = "ResubmitTestWorkload",
                                 task = "/ResubmitTestWorkload/MergeTask")
        mergeWorkflow.load()

        self.assertEqual(mergeWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(mergeWorkflow.spec, os.path.join(self.workDir, mergeWorkflow.name,
                                                          "WMSandbox", "WMWorkload.pkl"),
                         "Error: Wrong spec URL")
        self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        cleanupWorkflow = Workflow(name = "ResubmitTestWorkload",
                                 task = "/ResubmitTestWorkload/CleanupTask")
        cleanupWorkflow.load()

        self.assertEqual(cleanupWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(cleanupWorkflow.spec, os.path.join(self.workDir, cleanupWorkflow.name,
                                                          "WMSandbox", "WMWorkload.pkl"),
                         "Error: Wrong spec URL")
        self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0,
                         "Error: Wrong number of WF outputs.")        

        unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]
        unmergedMergeOutput.loadData()

        self.assertEqual(unmergedMergeOutput.name, "/ResubmitTestWorkload/MergeTask/merged-Merged",
                         "Error: Unmerged output fileset is wrong.")

        skimWorkflow = Workflow(name = "ResubmitTestWorkload",
                                task = "/ResubmitTestWorkload/MergeTask/SkimTask")
        skimWorkflow.load()

        self.assertEqual(skimWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(skimWorkflow.spec, os.path.join(self.workDir, skimWorkflow.name,
                                                          "WMSandbox", "WMWorkload.pkl"),
                         "Error: Wrong spec URL")
        self.assertEqual(len(skimWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["merged_output_fileset"]
        unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0]["output_fileset"]
        mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["merged_output_fileset"]
        unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0]["output_fileset"]

        mergedSkimOutputA.loadData()
        mergedSkimOutputB.loadData()
        unmergedSkimOutputA.loadData()
        unmergedSkimOutputB.loadData()

        self.assertEqual(mergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA",
                         "Error: Merged output fileset is wrong: %s" % mergedSkimOutputA.name)
        self.assertEqual(unmergedSkimOutputA.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA",
                         "Error: Unmerged output fileset is wrong.")
        self.assertEqual(mergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB",
                         "Error: Merged output fileset is wrong.")
        self.assertEqual(unmergedSkimOutputB.name, "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB",
                         "Error: Unmerged output fileset is wrong.")

        topLevelFileset = Fileset(name = "ResubmitTestWorkload-MergeTask-SomeBlock2")
        topLevelFileset.loadData()

        mergeSubscription = Subscription(fileset = topLevelFileset, workflow = mergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0,
                         "Error: Wrong white/black list for merge sub.")

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                         "Error: Wrong split algo.")        

        skimSubscription = Subscription(fileset = unmergedMergeOutput, workflow = skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        return
Exemplo n.º 42
0
    def testPrivateMC(self):
        """
        _testAnalysis_
        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "privatemc_t"
        defaultArguments["AnalysisConfigCacheDoc"] = self.injectAnalysisConfig()
        defaultArguments["ProcessingVersion"] = 1

        processingFactory = PrivateMCWorkloadFactory()
        testWorkload = processingFactory("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")

        testWMBSHelper = WMBSHelper(testWorkload, "PrivateMC", "SomeBlock")
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)
        procWorkflow = Workflow(name = "TestWorkload",
                              task = "/TestWorkload/PrivateMC")
        procWorkflow.load()
        self.assertEqual(len(procWorkflow.outputMap.keys()), 3,
                                  "Error: Wrong number of WF outputs: %s" % len(procWorkflow.outputMap.keys()))

        logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]#Actually Analysis does not have a merge task
        unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()
        self.assertEqual(logArchOutput.name, "/TestWorkload/PrivateMC/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/PrivateMC/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = ["OutputA", "OutputB"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/PrivateMC/unmerged-%s" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/PrivateMC/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

        topLevelFileset = Fileset(name = "TestWorkload-PrivateMC-SomeBlock")
        topLevelFileset.loadData()
        procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow)
        procSubscription.loadData()
        self.assertEqual(procSubscription["type"], "PrivateMC",
                         "Error: Wrong subscription type.")
        self.assertEqual(procSubscription["split_algo"], "EventBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/PrivateMC/unmerged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/PrivateMC/LogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()
        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")
        myRun = Run(runNumber = dbsResult["LumiList"][0]["RunNumber"])
        for lumi in dbsResult["LumiList"]:
            myRun.lumis.append(lumi["LumiSectionNumber"])
        myFile.addRun(myRun)
        myFile.create()
        inputFileset.addFile(myFile)

    if len(inputFileset) < 1:
        raise Exception, "No files were selected!"

    inputFileset.commit()
    inputFileset.markOpen(False)
    return

myThread = threading.currentThread()
myThread.transaction.begin()
for workloadTask in workload.taskIterator():
    inputFileset = Fileset(name = workloadTask.getPathName())
    inputFileset.create()

    inputDataset = workloadTask.inputDataset()
    inputDatasetPath = "/%s/%s/%s" % (inputDataset.primary,
                                      inputDataset.processed,
                                      inputDataset.tier)
    injectFilesFromDBS(inputFileset, inputDatasetPath, options.RunWhitelist)

    myWMBSHelper = WMBSHelper(workload)
    myWMBSHelper.createSubscription(workloadTash.getPathName())

myThread.transaction.commit()
Exemplo n.º 44
0
    def testTruncatedWFInsertion(self):
        """
        _testTruncatedWFInsertion_

        """
        resourceControl = ResourceControl()
        resourceControl.insertSite(siteName='site1',
                                   seName='goodse.cern.ch',
                                   ceName='site1',
                                   plugin="TestPlugin")
        resourceControl.insertSite(siteName='site2',
                                   seName='goodse2.cern.ch',
                                   ceName='site2',
                                   plugin="TestPlugin")

        testWorkload = self.createTestWMSpec()
        testTopLevelTask = getFirstTask(testWorkload)
        testWMBSHelper = WMBSHelper(testWorkload,
                                    testTopLevelTask.name(),
                                    "SomeBlock",
                                    cachepath=self.workDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testTopLevelTask,
                                          testWMBSHelper.topLevelFileset)

        testWorkload.truncate("ResubmitTestWorkload",
                              "/TestWorkload/ProcessingTask/MergeTask",
                              "someserver", "somedatabase")

        # create  the subscription for multiple top task (MergeTask and CleanupTask for the same block)
        for task in testWorkload.getTopLevelTask():
            testResubmitWMBSHelper = WMBSHelper(testWorkload,
                                                task.name(),
                                                "SomeBlock2",
                                                cachepath=self.workDir)
            testResubmitWMBSHelper.createTopLevelFileset()
            testResubmitWMBSHelper.createSubscription(
                task, testResubmitWMBSHelper.topLevelFileset)

        mergeWorkflow = Workflow(name="ResubmitTestWorkload",
                                 task="/ResubmitTestWorkload/MergeTask")
        mergeWorkflow.load()

        self.assertEqual(mergeWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(
            mergeWorkflow.spec,
            os.path.join(self.workDir, mergeWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(mergeWorkflow.outputMap.keys()), 1,
                         "Error: Wrong number of WF outputs.")

        cleanupWorkflow = Workflow(name="ResubmitTestWorkload",
                                   task="/ResubmitTestWorkload/CleanupTask")
        cleanupWorkflow.load()

        self.assertEqual(cleanupWorkflow.owner, "sfoulkes",
                         "Error: Wrong owner.")
        self.assertEqual(
            cleanupWorkflow.spec,
            os.path.join(self.workDir, cleanupWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(cleanupWorkflow.outputMap.keys()), 0,
                         "Error: Wrong number of WF outputs.")

        unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
            "output_fileset"]
        unmergedMergeOutput.loadData()

        self.assertEqual(unmergedMergeOutput.name,
                         "/ResubmitTestWorkload/MergeTask/merged-Merged",
                         "Error: Unmerged output fileset is wrong.")

        skimWorkflow = Workflow(
            name="ResubmitTestWorkload",
            task="/ResubmitTestWorkload/MergeTask/SkimTask")
        skimWorkflow.load()

        self.assertEqual(skimWorkflow.owner, "sfoulkes", "Error: Wrong owner.")
        self.assertEqual(
            skimWorkflow.spec,
            os.path.join(self.workDir, skimWorkflow.name, "WMSandbox",
                         "WMWorkload.pkl"), "Error: Wrong spec URL")
        self.assertEqual(len(skimWorkflow.outputMap.keys()), 2,
                         "Error: Wrong number of WF outputs.")

        mergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "merged_output_fileset"]
        unmergedSkimOutputA = skimWorkflow.outputMap["SkimOutputA"][0][
            "output_fileset"]
        mergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "merged_output_fileset"]
        unmergedSkimOutputB = skimWorkflow.outputMap["SkimOutputB"][0][
            "output_fileset"]

        mergedSkimOutputA.loadData()
        mergedSkimOutputB.loadData()
        unmergedSkimOutputA.loadData()
        unmergedSkimOutputB.loadData()

        self.assertEqual(
            mergedSkimOutputA.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Merged output fileset is wrong: %s" %
            mergedSkimOutputA.name)
        self.assertEqual(
            unmergedSkimOutputA.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputA",
            "Error: Unmerged output fileset is wrong.")
        self.assertEqual(
            mergedSkimOutputB.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Merged output fileset is wrong.")
        self.assertEqual(
            unmergedSkimOutputB.name,
            "/ResubmitTestWorkload/MergeTask/SkimTask/unmerged-SkimOutputB",
            "Error: Unmerged output fileset is wrong.")

        topLevelFileset = Fileset(
            name="ResubmitTestWorkload-MergeTask-SomeBlock2")
        topLevelFileset.loadData()

        mergeSubscription = Subscription(fileset=topLevelFileset,
                                         workflow=mergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(len(mergeSubscription.getWhiteBlackList()), 0,
                         "Error: Wrong white/black list for merge sub.")

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "WMBSMergeBySize",
                         "Error: Wrong split algo.")

        skimSubscription = Subscription(fileset=unmergedMergeOutput,
                                        workflow=skimWorkflow)
        skimSubscription.loadData()

        self.assertEqual(skimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(skimSubscription["split_algo"], "FileBased",
                         "Error: Wrong split algo.")

        return
Exemplo n.º 45
0
    def testDependentReDigi(self):
        """
        _testDependentReDigi_

        Verfiy that a dependent ReDigi workflow that keeps stages out
        RAW data is created and installed into WMBS correctly.
        """
        defaultArguments = getTestArguments()
        defaultArguments["CouchURL"] = os.environ["COUCHURL"]
        defaultArguments["CouchDBName"] = "redigi_t"
        configs = self.injectReDigiConfigs()
        defaultArguments["StepOneConfigCacheID"] = configs[0]
        defaultArguments["StepTwoConfigCacheID"] = configs[1]
        defaultArguments["StepThreeConfigCacheID"] = configs[2]

        testWorkload = reDigiWorkload("TestWorkload", defaultArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DWMWM")
        
        testWMBSHelper = WMBSHelper(testWorkload, "SomeBlock")
        testWMBSHelper.createSubscription()

        topLevelFileset = Fileset(name = "TestWorkload-StepOneProc-SomeBlock")
        topLevelFileset.loadData()

        stepOneUnmergedRAWFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-RAWDEBUGoutput")
        stepOneUnmergedRAWFileset.loadData()
        stepOneMergedRAWFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/merged-Merged")
        stepOneMergedRAWFileset.loadData()
        stepOneLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/unmerged-logArchive")
        stepOneLogArchiveFileset.loadData()
        stepOneMergeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/merged-logArchive")
        stepOneMergeLogArchiveFileset.loadData()

        stepTwoUnmergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-DQMoutput")
        stepTwoUnmergedDQMFileset.loadData()
        stepTwoUnmergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-RECODEBUGoutput")
        stepTwoUnmergedRECOFileset.loadData()
        stepTwoMergedDQMFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput/merged-Merged")
        stepTwoMergedDQMFileset.loadData()
        stepTwoMergedRECOFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/merged-Merged")
        stepTwoMergedRECOFileset.loadData()
        stepTwoLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/unmerged-logArchive")
        stepTwoLogArchiveFileset.loadData()
        stepTwoMergeDQMLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput/merged-logArchive")
        stepTwoMergeDQMLogArchiveFileset.loadData()
        stepTwoMergeRECOLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/merged-logArchive")
        stepTwoMergeRECOLogArchiveFileset.loadData()

        stepThreeUnmergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/unmerged-aodOutputModule")
        stepThreeUnmergedAODFileset.loadData()
        stepThreeMergedAODFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule/merged-Merged")
        stepThreeMergedAODFileset.loadData()
        stepThreeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/unmerged-logArchive")
        stepThreeLogArchiveFileset.loadData()
        
        stepThreeMergeLogArchiveFileset = Fileset(name = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule/merged-logArchive")
        stepThreeMergeLogArchiveFileset.loadData()

        stepOneWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                   task = "/TestWorkload/StepOneProc")
        stepOneWorkflow.load()
        self.assertTrue("logArchive" in stepOneWorkflow.outputMap.keys(),
                        "Error: Step one missing output module.")
        self.assertTrue("RAWDEBUGoutput" in stepOneWorkflow.outputMap.keys(),
                        "Error: Step one missing output module.")
        self.assertEqual(stepOneWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepOneLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepOneWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepOneLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepOneWorkflow.outputMap["RAWDEBUGoutput"][0]["merged_output_fileset"].id, stepOneMergedRAWFileset.id,
                         "Error: RAWDEBUG output fileset is wrong.")
        self.assertEqual(stepOneWorkflow.outputMap["RAWDEBUGoutput"][0]["output_fileset"].id, stepOneUnmergedRAWFileset.id,
                         "Error: RAWDEBUG output fileset is wrong.")
        
        for outputMod in stepOneWorkflow.outputMap.keys():
            self.assertTrue(len(stepOneWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepOneSub = Subscription(workflow = stepOneWorkflow, fileset = topLevelFileset)
        stepOneSub.loadData()
        self.assertEqual(stepOneSub["type"], "Processing",
                         "Error: Step one sub has wrong type.")

        stepOneCleanupWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                          task = "/TestWorkload/StepOneProc/StepOneProcCleanupUnmergedRAWDEBUGoutput")
        stepOneCleanupWorkflow.load()
        self.assertEqual(len(stepOneCleanupWorkflow.outputMap.keys()), 0,
                         "Error: Cleanup should have no output.")
        stepOneCleanupSub = Subscription(workflow = stepOneCleanupWorkflow, fileset = stepOneUnmergedRAWFileset)
        stepOneCleanupSub.loadData()
        self.assertEqual(stepOneCleanupSub["type"], "Cleanup",
                         "Error: Step one sub has wrong type.")

        stepOneLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                             task = "/TestWorkload/StepOneProc/LogCollect")
        stepOneLogCollectWorkflow.load()
        self.assertEqual(len(stepOneLogCollectWorkflow.outputMap.keys()), 0,
                         "Error: LogCollect should have no output.")
        stepOneLogCollectSub = Subscription(workflow = stepOneLogCollectWorkflow, fileset = stepOneLogArchiveFileset)
        stepOneLogCollectSub.loadData()
        self.assertEqual(stepOneLogCollectSub["type"], "LogCollect",
                         "Error: Step one sub has wrong type.")

        stepOneMergeWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                        task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput")
        stepOneMergeWorkflow.load()
        self.assertTrue("Merged" in stepOneMergeWorkflow.outputMap.keys(),
                        "Error: Step one merge missing output module.")
        self.assertTrue("logArchive" in stepOneMergeWorkflow.outputMap.keys(),
                        "Error: Step one merge missing output module.")
        self.assertEqual(stepOneMergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepOneMergeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepOneMergeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepOneMergeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepOneMergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepOneMergedRAWFileset.id,
                         "Error: RAWDEBUG merge output fileset is wrong.")
        self.assertEqual(stepOneMergeWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepOneMergedRAWFileset.id,
                         "Error: RAWDEBUG merge output fileset is wrong.")
        for outputMod in stepOneMergeWorkflow.outputMap.keys():
            self.assertTrue(len(stepOneMergeWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")
        stepOneMergeSub = Subscription(workflow = stepOneMergeWorkflow, fileset = stepOneUnmergedRAWFileset)
        stepOneMergeSub.loadData()
        self.assertEqual(stepOneMergeSub["type"], "Merge",
                         "Error: Step one sub has wrong type.")

        stepTwoWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                   task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc")
        stepTwoWorkflow.load()
        self.assertTrue("RECODEBUGoutput" in stepTwoWorkflow.outputMap.keys(),
                        "Error: Step two missing output module.")
        self.assertTrue("DQMoutput" in stepTwoWorkflow.outputMap.keys(),
                        "Error: Step two missing output module.")
        self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id,
                         "Error: RECODEBUG output fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["RECODEBUGoutput"][0]["output_fileset"].id, stepTwoUnmergedRECOFileset.id,
                         "Error: RECODEBUG output fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id,
                         "Error: DQM output fileset is wrong.")
        self.assertEqual(stepTwoWorkflow.outputMap["DQMoutput"][0]["output_fileset"].id, stepTwoUnmergedDQMFileset.id,
                         "Error: DQM output fileset is wrong.")        
        stepTwoSub = Subscription(workflow = stepTwoWorkflow, fileset = stepOneMergedRAWFileset)
        stepTwoSub.loadData()
        self.assertEqual(stepTwoSub["type"], "Processing",
                         "Error: Step two sub has wrong type.")

        for outputMod in stepTwoWorkflow.outputMap.keys():
            self.assertTrue(len(stepTwoWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepTwoCleanupDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                             task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcCleanupUnmergedDQMoutput")
        stepTwoCleanupDQMWorkflow.load()
        self.assertEqual(len(stepTwoCleanupDQMWorkflow.outputMap.keys()), 0,
                         "Error: Cleanup shouldn't have any output.")
        stepTwoCleanupDQMSub = Subscription(workflow = stepTwoCleanupDQMWorkflow, fileset = stepTwoUnmergedDQMFileset)
        stepTwoCleanupDQMSub.loadData()
        self.assertEqual(stepTwoCleanupDQMSub["type"], "Cleanup",
                         "Error: Step two sub has wrong type.")        

        stepTwoCleanupRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                              task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcCleanupUnmergedRECODEBUGoutput")
        stepTwoCleanupRECOWorkflow.load()
        self.assertEqual(len(stepTwoCleanupRECOWorkflow.outputMap.keys()), 0,
                         "Error: Cleanup shouldn't have any output.")
        stepTwoCleanupRECOSub = Subscription(workflow = stepTwoCleanupRECOWorkflow, fileset = stepTwoUnmergedRECOFileset)
        stepTwoCleanupRECOSub.loadData()
        self.assertEqual(stepTwoCleanupRECOSub["type"], "Cleanup",
                         "Error: Step two sub has wrong type.")                

        stepTwoLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                             task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcLogCollect")
        stepTwoLogCollectWorkflow.load()
        self.assertEqual(len(stepTwoLogCollectWorkflow.outputMap.keys()), 0,
                         "Error: LogCollect shouldn't have any output.")
        stepTwoLogCollectSub = Subscription(workflow = stepTwoLogCollectWorkflow, fileset = stepTwoLogArchiveFileset)
        stepTwoLogCollectSub.loadData()
        self.assertEqual(stepTwoLogCollectSub["type"], "LogCollect",
                         "Error: Step two sub has wrong type.")

        stepTwoMergeRECOWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                            task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput")
        stepTwoMergeRECOWorkflow.load()
        self.assertTrue("Merged" in stepTwoMergeRECOWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertTrue("logArchive" in stepTwoMergeRECOWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")        
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeRECOLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedRECOFileset.id,
                         "Error: RECODEBUG merge output fileset is wrong.")
        self.assertEqual(stepTwoMergeRECOWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedRECOFileset.id,
                         "Error: RECODEBUG merge output fileset is wrong.")
        stepTwoMergeRECOSub = Subscription(workflow = stepTwoMergeRECOWorkflow, fileset = stepTwoUnmergedRECOFileset)
        stepTwoMergeRECOSub.loadData()
        self.assertEqual(stepTwoMergeRECOSub["type"], "Merge",
                         "Error: Step two sub has wrong type.")
        for outputMod in stepTwoMergeRECOWorkflow.outputMap.keys():
            self.assertTrue(len(stepTwoMergeRECOWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepTwoMergeDQMWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                           task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeDQMoutput")
        stepTwoMergeDQMWorkflow.load()
        self.assertTrue("Merged" in stepTwoMergeDQMWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertTrue("logArchive" in stepTwoMergeDQMWorkflow.outputMap.keys(),
                        "Error: Step two merge missing output module.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepTwoMergeDQMLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepTwoMergedDQMFileset.id,
                         "Error: DQM merge output fileset is wrong.")
        self.assertEqual(stepTwoMergeDQMWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepTwoMergedDQMFileset.id,
                         "Error: DQM merge output fileset is wrong.")
        stepTwoMergeDQMSub = Subscription(workflow = stepTwoMergeDQMWorkflow, fileset = stepTwoUnmergedDQMFileset)
        stepTwoMergeDQMSub.loadData()
        self.assertEqual(stepTwoMergeDQMSub["type"], "Merge",
                         "Error: Step two sub has wrong type.")
        for outputMod in stepTwoMergeDQMWorkflow.outputMap.keys():
            self.assertTrue(len(stepTwoMergeDQMWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")
        
        stepThreeWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                     task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc")
        stepThreeWorkflow.load()
        self.assertTrue("aodOutputModule" in stepThreeWorkflow.outputMap.keys(),
                        "Error: Step three missing output module.")
        self.assertTrue("logArchive" in stepThreeWorkflow.outputMap.keys(),
                        "Error: Step three missing output module.")        
        self.assertEqual(stepThreeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepThreeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepThreeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepThreeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepThreeWorkflow.outputMap["aodOutputModule"][0]["merged_output_fileset"].id, stepThreeMergedAODFileset.id,
                         "Error: RECODEBUG output fileset is wrong.")
        self.assertEqual(stepThreeWorkflow.outputMap["aodOutputModule"][0]["output_fileset"].id, stepThreeUnmergedAODFileset.id,
                         "Error: RECODEBUG output fileset is wrong.")
        stepThreeSub = Subscription(workflow = stepThreeWorkflow, fileset = stepTwoMergedRECOFileset)
        stepThreeSub.loadData()
        self.assertEqual(stepThreeSub["type"], "Processing",
                         "Error: Step three sub has wrong type.")                
        for outputMod in stepThreeWorkflow.outputMap.keys():
            self.assertTrue(len(stepThreeWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        stepThreeCleanupWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                            task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcCleanupUnmergedaodOutputModule")
        stepThreeCleanupWorkflow.load()
        self.assertEqual(len(stepThreeCleanupWorkflow.outputMap.keys()), 0,
                         "Error: Cleanup should have no output.")
        stepThreeCleanupSub = Subscription(workflow = stepThreeCleanupWorkflow, fileset = stepThreeUnmergedAODFileset)
        stepThreeCleanupSub.loadData()
        self.assertEqual(stepThreeCleanupSub["type"], "Cleanup",
                         "Error: Step three sub has wrong type.")

        stepThreeLogCollectWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                               task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcLogCollect")
        stepThreeLogCollectWorkflow.load()
        self.assertEqual(len(stepThreeLogCollectWorkflow.outputMap.keys()), 0,
                         "Error: LogCollect should have no output.")
        stepThreeLogCollectSub = Subscription(workflow = stepThreeLogCollectWorkflow, fileset = stepThreeLogArchiveFileset)
        stepThreeLogCollectSub.loadData()
        self.assertEqual(stepThreeLogCollectSub["type"], "LogCollect",
                         "Error: Step three sub has wrong type.")

        stepThreeMergeWorkflow = Workflow(spec = "somespec", name = "TestWorkload",
                                          task = "/TestWorkload/StepOneProc/StepOneProcMergeRAWDEBUGoutput/StepTwoProc/StepTwoProcMergeRECODEBUGoutput/StepThreeProc/StepThreeProcMergeaodOutputModule")
        stepThreeMergeWorkflow.load()
        self.assertTrue("Merged" in stepThreeMergeWorkflow.outputMap.keys(),
                        "Error: Step three merge missing output module.")
        self.assertTrue("logArchive" in stepThreeMergeWorkflow.outputMap.keys(),
                        "Error: Step three merge missing output module.")                
        self.assertEqual(stepThreeMergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"].id, stepThreeMergeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepThreeMergeWorkflow.outputMap["logArchive"][0]["output_fileset"].id, stepThreeMergeLogArchiveFileset.id,
                         "Error: logArchive fileset is wrong.")
        self.assertEqual(stepThreeMergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"].id, stepThreeMergedAODFileset.id,
                         "Error: AOD merge output fileset is wrong.")
        self.assertEqual(stepThreeMergeWorkflow.outputMap["Merged"][0]["output_fileset"].id, stepThreeMergedAODFileset.id,
                         "Error: AOD merge output fileset is wrong.")
        stepThreeMergeSub = Subscription(workflow = stepThreeMergeWorkflow, fileset = stepThreeUnmergedAODFileset)
        stepThreeMergeSub.loadData()
        self.assertEqual(stepThreeMergeSub["type"], "Merge",
                         "Error: Step three sub has wrong type.")                        
        for outputMod in stepThreeMergeWorkflow.outputMap.keys():
            self.assertTrue(len(stepThreeMergeWorkflow.outputMap[outputMod]) == 1,
                            "Error: more than one destination for output mod.")

        return
Exemplo n.º 46
0
    def testPromptRecoWithSkims(self):
        """
        _testT1PromptRecoWithSkim_

        Create a T1 Prompt Reconstruction workflow with PromptSkims
        and verify it installs into WMBS correctly.
        """
        self.setupPromptSkimConfigObject()
        testArguments = getTestArguments()
        testArguments["PromptSkims"] = [self.promptSkim]
        testArguments["CouchURL"] = os.environ["COUCHURL"]
        testArguments["CouchDBName"] = "promptreco_t"
        testArguments["EnvPath"] = os.environ.get("EnvPath", None)
        testArguments["BinPath"] = os.environ.get("BinPath", None)

        testWorkload = promptrecoWorkload("TestWorkload", testArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "T0")

        testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock")
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask,
                                          testWMBSHelper.topLevelFileset)

        recoWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco")
        recoWorkflow.load()
        self.assertEqual(len(recoWorkflow.outputMap.keys()),
                         len(testArguments["WriteTiers"]) + 1,
                         "Error: Wrong number of WF outputs in the Reco WF.")

        goldenOutputMods = [
            "write_RECO", "write_ALCARECO", "write_AOD", "write_DQM"
        ]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = recoWorkflow.outputMap[goldenOutputMod][0][
                "merged_output_fileset"]
            unmergedOutput = recoWorkflow.outputMap[goldenOutputMod][0][
                "output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()

            if goldenOutputMod != "write_ALCARECO":
                self.assertEqual(
                    mergedOutput.name,
                    "/TestWorkload/Reco/RecoMerge%s/merged-Merged" %
                    goldenOutputMod,
                    "Error: Merged output fileset is wrong: %s" %
                    mergedOutput.name)
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod,
                "Error: Unmerged output fileset is wrong: %s" %
                unmergedOutput.name)

        logArchOutput = recoWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name,
                         "/TestWorkload/Reco/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name,
                         "/TestWorkload/Reco/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        alcaSkimWorkflow = Workflow(name="TestWorkload",
                                    task="/TestWorkload/Reco/AlcaSkim")
        alcaSkimWorkflow.load()
        self.assertEqual(
            len(alcaSkimWorkflow.outputMap.keys()),
            len(testArguments["AlcaSkims"]) + 1,
            "Error: Wrong number of WF outputs in the AlcaSkim WF.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)

        for goldenOutputMod in goldenOutputMods:
            mergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0][
                "merged_output_fileset"]
            unmergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0][
                "output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()
            self.assertEqual(
                mergedOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" %
                goldenOutputMod, "Error: Merged output fileset is wrong: %s" %
                mergedOutput.name)
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod,
                "Error: Unmerged output fileset is wrong: %s" %
                unmergedOutput.name)

        logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name,
                         "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name,
                         "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        promptSkimWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1")
        promptSkimWorkflow.load()

        self.assertEqual(len(promptSkimWorkflow.outputMap.keys()), 6,
                         "Error: Wrong number of WF outputs.")

        goldenOutputMods = [
            "fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4",
            "fakeSkimOut5"
        ]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = promptSkimWorkflow.outputMap[goldenOutputMod][0][
                "merged_output_fileset"]
            unmergedOutput = promptSkimWorkflow.outputMap[goldenOutputMod][0][
                "output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(
                mergedOutput.name,
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged"
                % goldenOutputMod,
                "Error: Merged output fileset is wrong: %s" %
                mergedOutput.name)
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s"
                % goldenOutputMod,
                "Error: Unmerged output fileset is wrong: %s" %
                unmergedOutput.name)

        logArchOutput = promptSkimWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = promptSkimWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(
            logArchOutput.name,
            "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.")
        self.assertEqual(
            unmergedLogArchOutput.name,
            "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"]
        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name="TestWorkload",
                                     task="/TestWorkload/Reco/RecoMerge%s" %
                                     goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(
                mergedMergeOutput.name,
                "/TestWorkload/Reco/RecoMerge%s/merged-Merged" %
                goldenOutputMod, "Error: Merged output fileset is wrong.")
            self.assertEqual(
                unmergedMergeOutput.name,
                "/TestWorkload/Reco/RecoMerge%s/merged-Merged" %
                goldenOutputMod, "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(
                logArchOutput.name,
                "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" %
                goldenOutputMod,
                "Error: LogArchive output fileset is wrong: %s" %
                logArchOutput.name)
            self.assertEqual(
                unmergedLogArchOutput.name,
                "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" %
                goldenOutputMod, "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" %
                goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(
                len(mergeWorkflow.outputMap.keys()), 2,
                "Error: Wrong number of WF outputs %d." %
                len(mergeWorkflow.outputMap.keys()))

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(
                mergedMergeOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" %
                goldenOutputMod, "Error: Merged output fileset is wrong.")
            self.assertEqual(
                unmergedMergeOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" %
                goldenOutputMod, "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(
                logArchOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong: %s" %
                logArchOutput.name)
            self.assertEqual(
                unmergedLogArchOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = [
            "fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4",
            "fakeSkimOut5"
        ]
        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(
                name="TestWorkload",
                task=
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s"
                % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(
                len(mergeWorkflow.outputMap.keys()), 2,
                "Error: Wrong number of WF outputs %d." %
                len(mergeWorkflow.outputMap.keys()))

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(
                mergedMergeOutput.name,
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged"
                % goldenOutputMod, "Error: Merged output fileset is wrong.")
            self.assertEqual(
                unmergedMergeOutput.name,
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-Merged"
                % goldenOutputMod, "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(
                logArchOutput.name,
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong: %s" %
                logArchOutput.name)
            self.assertEqual(
                unmergedLogArchOutput.name,
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-Reco-SomeBlock")
        topLevelFileset.loadData()

        recoSubscription = Subscription(fileset=topLevelFileset,
                                        workflow=recoWorkflow)
        recoSubscription.loadData()

        self.assertEqual(recoSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(
            recoSubscription["split_algo"], "EventBased",
            "Error: Wrong split algorithm. %s" %
            recoSubscription["split_algo"])

        alcaRecoFileset = Fileset(
            name="/TestWorkload/Reco/unmerged-write_ALCARECO")
        alcaRecoFileset.loadData()

        alcaSkimSubscription = Subscription(fileset=alcaRecoFileset,
                                            workflow=alcaSkimWorkflow)
        alcaSkimSubscription.loadData()

        self.assertEqual(alcaSkimSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(
            alcaSkimSubscription["split_algo"], "WMBSMergeBySize",
            "Error: Wrong split algorithm. %s" %
            alcaSkimSubscription["split_algo"])

        mergedRecoFileset = Fileset(
            name="/TestWorkload/Reco/RecoMergewrite_RECO/merged-Merged")
        mergedRecoFileset.loadData()

        promptSkimSubscription = Subscription(fileset=mergedRecoFileset,
                                              workflow=promptSkimWorkflow)
        promptSkimSubscription.loadData()

        self.assertEqual(promptSkimSubscription["type"], "Skim",
                         "Error: Wrong subscription type.")
        self.assertEqual(
            promptSkimSubscription["split_algo"], "FileBased",
            "Error: Wrong split algorithm. %s" %
            promptSkimSubscription["split_algo"])

        unmergedOutputs = ["write_RECO", "write_AOD", "write_DQM"]
        for unmergedOutput in unmergedOutputs:
            unmergedDataTier = Fileset(name="/TestWorkload/Reco/unmerged-%s" %
                                       unmergedOutput)
            unmergedDataTier.loadData()
            dataTierMergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/RecoMerge%s" % unmergedOutput)
            dataTierMergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmergedDataTier,
                                             workflow=dataTierMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(
                mergeSubscription["split_algo"], "WMBSMergeBySize",
                "Error: Wrong split algorithm. %s" %
                mergeSubscription["split_algo"])
        unmergedOutputs = []
        for alcaProd in testArguments["AlcaSkims"]:
            unmergedOutputs.append("ALCARECOStream%s" % alcaProd)
        for unmergedOutput in unmergedOutputs:
            unmergedAlcaSkim = Fileset(
                name="/TestWorkload/Reco/AlcaSkim/unmerged-%s" %
                unmergedOutput)
            unmergedAlcaSkim.loadData()
            alcaSkimMergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" %
                unmergedOutput)
            alcaSkimMergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmergedAlcaSkim,
                                             workflow=alcaSkimMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(
                mergeSubscription["split_algo"], "WMBSMergeBySize",
                "Error: Wrong split algorithm. %s" %
                mergeSubscription["split_algo"])

        unmergedOutputs = [
            "fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4",
            "fakeSkimOut5"
        ]
        for unmergedOutput in unmergedOutputs:
            unmergedPromptSkim = Fileset(
                name=
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s"
                % unmergedOutput)
            unmergedPromptSkim.loadData()
            promptSkimMergeWorkflow = Workflow(
                name="TestWorkload",
                task=
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s"
                % unmergedOutput)
            promptSkimMergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmergedPromptSkim,
                                             workflow=promptSkimMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(
                mergeSubscription["split_algo"], "ParentlessMergeBySize",
                "Error: Wrong split algorithm. %s" %
                mergeSubscription["split_algo"])

        goldenOutputMods = [
            "write_RECO", "write_AOD", "write_DQM", "write_ALCARECO"
        ]
        for goldenOutputMod in goldenOutputMods:
            unmergedFileset = Fileset(name="/TestWorkload/Reco/unmerged-%s" %
                                      goldenOutputMod)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/RecoCleanupUnmerged%s" %
                goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmergedFileset,
                                               workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"],
                             "SiblingProcessingBased",
                             "Error: Wrong subscription type.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)
        for goldenOutputMod in goldenOutputMods:
            unmergedFileset = Fileset(
                name="/TestWorkload/Reco/AlcaSkim/unmerged-%s" %
                goldenOutputMod)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" %
                goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmergedFileset,
                                               workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"],
                             "SiblingProcessingBased",
                             "Error: Wrong subscription type.")

        goldenOutputMods = [
            "fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4",
            "fakeSkimOut5"
        ]
        for goldenOutputMod in goldenOutputMods:
            unmergedFileset = Fileset(
                name=
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-%s"
                % unmergedOutput)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(
                name="TestWorkload",
                task=
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1CleanupUnmerged%s"
                % unmergedOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmergedFileset,
                                               workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(
                cleanupSubscription["split_algo"], "SiblingProcessingBased",
                "Error: Wrong split algorithm. %s" %
                cleanupSubscription["split_algo"])

        recoLogCollect = Fileset(name="/TestWorkload/Reco/unmerged-logArchive")
        recoLogCollect.loadData()
        recoLogCollectWorkflow = Workflow(name="TestWorkload",
                                          task="/TestWorkload/Reco/LogCollect")
        recoLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=recoLogCollect,
                                     workflow=recoLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        alcaSkimLogCollect = Fileset(
            name="/TestWorkload/Reco/AlcaSkim/unmerged-logArchive")
        alcaSkimLogCollect.loadData()
        alcaSkimLogCollectWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect")
        alcaSkimLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=alcaSkimLogCollect,
                                     workflow=alcaSkimLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        promptSkimLogCollect = Fileset(
            name=
            "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/unmerged-logArchive"
        )
        promptSkimLogCollect.loadData()
        promptSkimLogCollectWorkflow = Workflow(
            name="TestWorkload",
            task=
            "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1LogCollect"
        )
        promptSkimLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=promptSkimLogCollect,
                                     workflow=promptSkimLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"]
        for goldenOutputMod in goldenOutputMods:
            recoMergeLogCollect = Fileset(
                name="/TestWorkload/Reco/RecoMerge%s/merged-logArchive" %
                goldenOutputMod)
            recoMergeLogCollect.loadData()
            recoMergeLogCollectWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" %
                (goldenOutputMod, goldenOutputMod))
            recoMergeLogCollectWorkflow.load()
            logCollectSubscription = Subscription(
                fileset=recoMergeLogCollect,
                workflow=recoMergeLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algorithm.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)
        for goldenOutputMod in goldenOutputMods:
            alcaSkimLogCollect = Fileset(
                name=
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive"
                % goldenOutputMod)
            alcaSkimLogCollect.loadData()
            alcaSkimLogCollectWorkflow = Workflow(
                name="TestWorkload",
                task=
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect"
                % (goldenOutputMod, goldenOutputMod))
            alcaSkimLogCollectWorkflow.load()
            logCollectSubscription = Subscription(
                fileset=alcaSkimLogCollect,
                workflow=alcaSkimLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algorithm.")

        goldenOutputMods = [
            "fakeSkimOut1", "fakeSkimOut2", "fakeSkimOut3", "fakeSkimOut4",
            "fakeSkimOut5"
        ]
        for goldenOutputMod in goldenOutputMods:
            promptSkimMergeLogCollect = Fileset(
                name=
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/merged-logArchive"
                % goldenOutputMod)
            promptSkimMergeLogCollect.loadData()
            promptSkimMergeLogCollectWorkflow = Workflow(
                name="TestWorkload",
                task=
                "/TestWorkload/Reco/RecoMergewrite_RECO/TestSkim1/TestSkim1Merge%s/TestSkim1%sMergeLogCollect"
                % (goldenOutputMod, goldenOutputMod))
            promptSkimMergeLogCollectWorkflow.load()
            logCollectSubscription = Subscription(
                fileset=promptSkimMergeLogCollect,
                workflow=promptSkimMergeLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algorithm.")

        return
Exemplo n.º 47
0
        }

        factory = TaskChainWorkloadFactory()
        try:
            self.workload = factory("YankingTheChain", arguments)
        except Exception, ex:
            msg = "Error invoking TaskChainWorkloadFactory:\n%s" % str(ex)
            self.fail(msg)


        self.workload.setSpecUrl("somespec")
        self.workload.setOwnerDetails("*****@*****.**", "DMWM")

        testWMBSHelper = WMBSHelper(self.workload, "DigiHLT", "SomeBlock", cachepath = self.testInit.testDir)
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)


        self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT"), arguments['Task1'])
        self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco"), arguments['Task2'])
        self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteALCA/ALCAReco"),
                        arguments['Task3'])
        self._checkTask(self.workload.getTaskByPath("/YankingTheChain/DigiHLT/DigiHLTMergewriteRAWDIGI/Reco/RecoMergewriteRECO/Skims"),
                        arguments['Task4'])

        digi = self.workload.getTaskByPath("/YankingTheChain/DigiHLT")
        digiStep = digi.getStepHelper("cmsRun1")
        self.assertEqual(digiStep.getGlobalTag(), arguments['GlobalTag'])
        self.assertEqual(digiStep.getCMSSWVersion(), arguments['CMSSWVersion'])
        self.assertEqual(digiStep.getScramArch(), arguments['ScramArch'])
 
Exemplo n.º 48
0
    def testPromptReco(self):
        """
        _testPromptReco_

        Create a Prompt Reconstruction workflow
        and verify it installs into WMBS correctly.
        """
        testArguments = getTestArguments()

        testWorkload = promptrecoWorkload("TestWorkload", testArguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "T0")

        testWMBSHelper = WMBSHelper(testWorkload, "Reco", "SomeBlock")
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask,
                                          testWMBSHelper.topLevelFileset)

        recoWorkflow = Workflow(name="TestWorkload", task="/TestWorkload/Reco")
        recoWorkflow.load()
        self.assertEqual(len(recoWorkflow.outputMap.keys()),
                         len(testArguments["WriteTiers"]) + 1,
                         "Error: Wrong number of WF outputs in the Reco WF.")

        goldenOutputMods = [
            "write_RECO", "write_ALCARECO", "write_AOD", "write_DQM"
        ]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = recoWorkflow.outputMap[goldenOutputMod][0][
                "merged_output_fileset"]
            unmergedOutput = recoWorkflow.outputMap[goldenOutputMod][0][
                "output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()

            if goldenOutputMod != "write_ALCARECO":
                self.assertEqual(
                    mergedOutput.name,
                    "/TestWorkload/Reco/RecoMerge%s/merged-Merged" %
                    goldenOutputMod,
                    "Error: Merged output fileset is wrong: %s" %
                    mergedOutput.name)
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/Reco/unmerged-%s" % goldenOutputMod,
                "Error: Unmerged output fileset is wrong: %s" %
                unmergedOutput.name)

        logArchOutput = recoWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = recoWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name,
                         "/TestWorkload/Reco/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name,
                         "/TestWorkload/Reco/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        alcaSkimWorkflow = Workflow(name="TestWorkload",
                                    task="/TestWorkload/Reco/AlcaSkim")
        alcaSkimWorkflow.load()
        self.assertEqual(
            len(alcaSkimWorkflow.outputMap.keys()),
            len(testArguments["AlcaSkims"]) + 1,
            "Error: Wrong number of WF outputs in the AlcaSkim WF.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)

        for goldenOutputMod in goldenOutputMods:
            mergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0][
                "merged_output_fileset"]
            unmergedOutput = alcaSkimWorkflow.outputMap[goldenOutputMod][0][
                "output_fileset"]
            mergedOutput.loadData()
            unmergedOutput.loadData()
            self.assertEqual(
                mergedOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" %
                goldenOutputMod, "Error: Merged output fileset is wrong: %s" %
                mergedOutput.name)
            self.assertEqual(
                unmergedOutput.name,
                "/TestWorkload/Reco/AlcaSkim/unmerged-%s" % goldenOutputMod,
                "Error: Unmerged output fileset is wrong: %s" %
                unmergedOutput.name)

        logArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = alcaSkimWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name,
                         "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name,
                         "/TestWorkload/Reco/AlcaSkim/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        dqmWorkflow = Workflow(
            name="TestWorkload",
            task=
            "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged"
        )
        dqmWorkflow.load()

        logArchOutput = dqmWorkflow.outputMap["logArchive"][0][
            "merged_output_fileset"]
        unmergedLogArchOutput = dqmWorkflow.outputMap["logArchive"][0][
            "output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(
            logArchOutput.name,
            "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.")
        self.assertEqual(
            unmergedLogArchOutput.name,
            "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/unmerged-logArchive",
            "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"]
        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name="TestWorkload",
                                     task="/TestWorkload/Reco/RecoMerge%s" %
                                     goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(
                mergedMergeOutput.name,
                "/TestWorkload/Reco/RecoMerge%s/merged-Merged" %
                goldenOutputMod, "Error: Merged output fileset is wrong.")
            self.assertEqual(
                unmergedMergeOutput.name,
                "/TestWorkload/Reco/RecoMerge%s/merged-Merged" %
                goldenOutputMod, "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(
                logArchOutput.name,
                "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" %
                goldenOutputMod,
                "Error: LogArchive output fileset is wrong: %s" %
                logArchOutput.name)
            self.assertEqual(
                unmergedLogArchOutput.name,
                "/TestWorkload/Reco/RecoMerge%s/merged-logArchive" %
                goldenOutputMod, "Error: LogArchive output fileset is wrong.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" %
                goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(
                len(mergeWorkflow.outputMap.keys()), 2,
                "Error: Wrong number of WF outputs %d." %
                len(mergeWorkflow.outputMap.keys()))

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0][
                "output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(
                mergedMergeOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" %
                goldenOutputMod, "Error: Merged output fileset is wrong.")
            self.assertEqual(
                unmergedMergeOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-Merged" %
                goldenOutputMod, "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0][
                "output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(
                logArchOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong: %s" %
                logArchOutput.name)
            self.assertEqual(
                unmergedLogArchOutput.name,
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive"
                % goldenOutputMod,
                "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name="TestWorkload-Reco-SomeBlock")
        topLevelFileset.loadData()

        recoSubscription = Subscription(fileset=topLevelFileset,
                                        workflow=recoWorkflow)
        recoSubscription.loadData()

        self.assertEqual(recoSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(
            recoSubscription["split_algo"], "EventBased",
            "Error: Wrong split algorithm. %s" %
            recoSubscription["split_algo"])

        alcaRecoFileset = Fileset(
            name="/TestWorkload/Reco/unmerged-write_ALCARECO")
        alcaRecoFileset.loadData()

        alcaSkimSubscription = Subscription(fileset=alcaRecoFileset,
                                            workflow=alcaSkimWorkflow)
        alcaSkimSubscription.loadData()

        self.assertEqual(alcaSkimSubscription["type"], "Processing",
                         "Error: Wrong subscription type.")
        self.assertEqual(
            alcaSkimSubscription["split_algo"], "WMBSMergeBySize",
            "Error: Wrong split algorithm. %s" %
            alcaSkimSubscription["split_algo"])

        mergedDQMFileset = Fileset(
            name="/TestWorkload/Reco/RecoMergewrite_DQM/merged-Merged")
        mergedDQMFileset.loadData()

        dqmSubscription = Subscription(fileset=mergedDQMFileset,
                                       workflow=dqmWorkflow)
        dqmSubscription.loadData()

        self.assertEqual(dqmSubscription["type"], "Harvesting",
                         "Error: Wrong subscription type.")
        self.assertEqual(dqmSubscription["split_algo"], "Harvest",
                         "Error: Wrong split algo.")

        unmergedOutputs = ["write_RECO", "write_AOD", "write_DQM"]
        for unmergedOutput in unmergedOutputs:
            unmergedDataTier = Fileset(name="/TestWorkload/Reco/unmerged-%s" %
                                       unmergedOutput)
            unmergedDataTier.loadData()
            dataTierMergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/RecoMerge%s" % unmergedOutput)
            dataTierMergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmergedDataTier,
                                             workflow=dataTierMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(
                mergeSubscription["split_algo"], "WMBSMergeBySize",
                "Error: Wrong split algorithm. %s" %
                mergeSubscription["split_algo"])
        unmergedOutputs = []
        for alcaProd in testArguments["AlcaSkims"]:
            unmergedOutputs.append("ALCARECOStream%s" % alcaProd)
        for unmergedOutput in unmergedOutputs:
            unmergedAlcaSkim = Fileset(
                name="/TestWorkload/Reco/AlcaSkim/unmerged-%s" %
                unmergedOutput)
            unmergedAlcaSkim.loadData()
            alcaSkimMergeWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s" %
                unmergedOutput)
            alcaSkimMergeWorkflow.load()
            mergeSubscription = Subscription(fileset=unmergedAlcaSkim,
                                             workflow=alcaSkimMergeWorkflow)
            mergeSubscription.loadData()

            self.assertEqual(mergeSubscription["type"], "Merge",
                             "Error: Wrong subscription type.")
            self.assertEqual(
                mergeSubscription["split_algo"], "WMBSMergeBySize",
                "Error: Wrong split algorithm. %s" %
                mergeSubscription["split_algo"])

        goldenOutputMods = [
            "write_RECO", "write_AOD", "write_DQM", "write_ALCARECO"
        ]
        for goldenOutputMod in goldenOutputMods:
            unmergedFileset = Fileset(name="/TestWorkload/Reco/unmerged-%s" %
                                      goldenOutputMod)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/RecoCleanupUnmerged%s" %
                goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmergedFileset,
                                               workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"],
                             "SiblingProcessingBased",
                             "Error: Wrong subscription type.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)
        for goldenOutputMod in goldenOutputMods:
            unmergedFileset = Fileset(
                name="/TestWorkload/Reco/AlcaSkim/unmerged-%s" %
                goldenOutputMod)
            unmergedFileset.loadData()
            cleanupWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/AlcaSkim/AlcaSkimCleanupUnmerged%s" %
                goldenOutputMod)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset=unmergedFileset,
                                               workflow=cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"],
                             "SiblingProcessingBased",
                             "Error: Wrong subscription type.")

        recoLogCollect = Fileset(name="/TestWorkload/Reco/unmerged-logArchive")
        recoLogCollect.loadData()
        recoLogCollectWorkflow = Workflow(name="TestWorkload",
                                          task="/TestWorkload/Reco/LogCollect")
        recoLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=recoLogCollect,
                                     workflow=recoLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        alcaSkimLogCollect = Fileset(
            name="/TestWorkload/Reco/AlcaSkim/unmerged-logArchive")
        alcaSkimLogCollect.loadData()
        alcaSkimLogCollectWorkflow = Workflow(
            name="TestWorkload",
            task="/TestWorkload/Reco/AlcaSkim/AlcaSkimLogCollect")
        alcaSkimLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset=alcaSkimLogCollect,
                                     workflow=alcaSkimLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algorithm.")

        goldenOutputMods = ["write_RECO", "write_AOD", "write_DQM"]
        for goldenOutputMod in goldenOutputMods:
            recoMergeLogCollect = Fileset(
                name="/TestWorkload/Reco/RecoMerge%s/merged-logArchive" %
                goldenOutputMod)
            recoMergeLogCollect.loadData()
            recoMergeLogCollectWorkflow = Workflow(
                name="TestWorkload",
                task="/TestWorkload/Reco/RecoMerge%s/Reco%sMergeLogCollect" %
                (goldenOutputMod, goldenOutputMod))
            recoMergeLogCollectWorkflow.load()
            logCollectSubscription = Subscription(
                fileset=recoMergeLogCollect,
                workflow=recoMergeLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algorithm.")

        goldenOutputMods = []
        for alcaProd in testArguments["AlcaSkims"]:
            goldenOutputMods.append("ALCARECOStream%s" % alcaProd)
        for goldenOutputMod in goldenOutputMods:
            alcaSkimLogCollect = Fileset(
                name=
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/merged-logArchive"
                % goldenOutputMod)
            alcaSkimLogCollect.loadData()
            alcaSkimLogCollectWorkflow = Workflow(
                name="TestWorkload",
                task=
                "/TestWorkload/Reco/AlcaSkim/AlcaSkimMerge%s/AlcaSkim%sMergeLogCollect"
                % (goldenOutputMod, goldenOutputMod))
            alcaSkimLogCollectWorkflow.load()
            logCollectSubscription = Subscription(
                fileset=alcaSkimLogCollect,
                workflow=alcaSkimLogCollectWorkflow)
            logCollectSubscription.loadData()

            self.assertEqual(logCollectSub["type"], "LogCollect",
                             "Error: Wrong subscription type.")
            self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                             "Error: Wrong split algorithm.")

        dqmHarvestLogCollect = Fileset(
            name=
            "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/unmerged-logArchive"
        )
        dqmHarvestLogCollect.loadData()
        dqmHarvestLogCollectWorkflow = Workflow(
            name="TestWorkload",
            task=
            "/TestWorkload/Reco/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged/RecoMergewrite_DQMMergedDQMHarvestLogCollect"
        )
        dqmHarvestLogCollectWorkflow.load()

        logCollectSub = Subscription(fileset=dqmHarvestLogCollect,
                                     workflow=dqmHarvestLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        return
Exemplo n.º 49
0
    def setupPromptRecoWorkflow(self):
        """
        _setupPromptRecoWorkflow_

        Populate WMBS with a real PromptReco workflow,
        every subscription must be unfinished at first
        """

        # Populate disk and WMBS
        testArguments = getTestArguments()

        workflowName = 'PromptReco_Run195360_Cosmics'
        workload = promptrecoWorkload(workflowName, testArguments)

        wmbsHelper = WMBSHelper(workload,
                                'Reco',
                                'SomeBlock',
                                cachepath=self.testDir)
        wmbsHelper.createTopLevelFileset()
        wmbsHelper.createSubscription(wmbsHelper.topLevelTask,
                                      wmbsHelper.topLevelFileset)

        self.stateMap = {
            'AlcaSkim': [],
            'Merge': [],
            'Harvesting': [],
            'Processing Done': []
        }
        self.orderedStates = [
            'AlcaSkim', 'Merge', 'Harvesting', 'Processing Done'
        ]

        # Populate WMStats
        self.wmstatsWriter.insertGenericRequest({'_id': workflowName})
        self.wmstatsWriter.updateRequestStatus(workflowName, 'Closed')

        topLevelTask = '/%s/Reco' % workflowName
        alcaSkimTask = '%s/AlcaSkim' % topLevelTask
        mergeTasks = [
            '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics',
            '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T',
            '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics',
            '%s/RecoMergewrite_AOD', '%s/RecoMergewrite_DQM',
            '%s/RecoMergewrite_RECO'
        ]
        harvestingTask = '%s/RecoMergewrite_DQM/RecoMergewrite_DQMDQMHarvestMerged' % topLevelTask

        self.stateMap['AlcaSkim'].append(wmbsHelper.topLevelSubscription)

        alcaSkimWorkflow = Workflow(name=workflowName, task=alcaSkimTask)
        alcaSkimWorkflow.load()
        alcarecoFileset = Fileset(
            name='/PromptReco_Run195360_Cosmics/Reco/unmerged-write_ALCARECO')
        alcarecoFileset.load()
        alcaSkimSub = Subscription(alcarecoFileset, alcaSkimWorkflow)
        alcaSkimSub.load()
        self.stateMap['Merge'].append(alcaSkimSub)

        for task in mergeTasks:
            mergeTask = task % topLevelTask
            mergeWorkflow = Workflow(name=workflowName, task=mergeTask)
            mergeWorkflow.load()
            if 'AlcaSkim' in mergeTask:
                stream = mergeTask.split('/')[-1][13:]
                unmergedFileset = Fileset(name='%s/unmerged-%s' %
                                          (alcaSkimTask, stream))
                unmergedFileset.load()
            else:
                dataTier = mergeTask.split('/')[-1].split('_')[-1]
                unmergedFileset = Fileset(name='%s/unmerged-write_%s' %
                                          (topLevelTask, dataTier))
                unmergedFileset.load()
            mergeSub = Subscription(unmergedFileset, mergeWorkflow)
            mergeSub.load()
            self.stateMap['Harvesting'].append(mergeSub)

        harvestingWorkflow = Workflow(name=workflowName, task=harvestingTask)
        harvestingWorkflow.load()
        harvestingFileset = Fileset(
            name=
            '/PromptReco_Run195360_Cosmics/Reco/RecoMergewrite_DQM/merged-Merged'
        )
        harvestingFileset.load()
        harvestingSub = Subscription(harvestingFileset, harvestingWorkflow)
        harvestingSub.load()
        self.stateMap['Processing Done'].append(harvestingSub)

        return
Exemplo n.º 50
0
    def testMonteCarloFromGEN(self):
        """
        _testMonteCarloFromGEN_

        Create a MonteCarloFromGEN workflow and verify it installs into WMBS
        correctly.
        """
        arguments = getTestArguments()
        arguments["ConfigCacheID"] = self.injectConfig()
        arguments["CouchDBName"] = "mclhe_t"
        testWorkload = monteCarloFromGENWorkload("TestWorkload", arguments)
        testWorkload.setSpecUrl("somespec")
        testWorkload.setOwnerDetails("*****@*****.**", "DMWM")

        testWMBSHelper = WMBSHelper(testWorkload, "MonteCarloFromGEN", "SomeBlock")
        testWMBSHelper.createTopLevelFileset()
        testWMBSHelper.createSubscription(testWMBSHelper.topLevelTask, testWMBSHelper.topLevelFileset)

        procWorkflow = Workflow(name = "TestWorkload",
                                task = "/TestWorkload/MonteCarloFromGEN")
        procWorkflow.load()

        self.assertEqual(len(procWorkflow.outputMap.keys()), 3,
                         "Error: Wrong number of WF outputs.")
        self.assertEqual(procWorkflow.wfType, 'lheproduction')

        goldenOutputMods = ["outputRECORECO", "outputALCARECOALCARECO"]
        for goldenOutputMod in goldenOutputMods:
            mergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["merged_output_fileset"]
            unmergedOutput = procWorkflow.outputMap[goldenOutputMod][0]["output_fileset"]

            mergedOutput.loadData()
            unmergedOutput.loadData()

            self.assertEqual(mergedOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong: %s" % mergedOutput.name)
            self.assertEqual(unmergedOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

        logArchOutput = procWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
        unmergedLogArchOutput = procWorkflow.outputMap["logArchive"][0]["output_fileset"]
        logArchOutput.loadData()
        unmergedLogArchOutput.loadData()

        self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")
        self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive",
                         "Error: LogArchive output fileset is wrong.")

        for goldenOutputMod in goldenOutputMods:
            mergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s" % goldenOutputMod)
            mergeWorkflow.load()

            self.assertEqual(len(mergeWorkflow.outputMap.keys()), 2,
                             "Error: Wrong number of WF outputs.")

            mergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["merged_output_fileset"]
            unmergedMergeOutput = mergeWorkflow.outputMap["Merged"][0]["output_fileset"]

            mergedMergeOutput.loadData()
            unmergedMergeOutput.loadData()

            self.assertEqual(mergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Merged output fileset is wrong.")
            self.assertEqual(unmergedMergeOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-Merged" % goldenOutputMod,
                             "Error: Unmerged output fileset is wrong.")

            logArchOutput = mergeWorkflow.outputMap["logArchive"][0]["merged_output_fileset"]
            unmergedLogArchOutput = mergeWorkflow.outputMap["logArchive"][0]["output_fileset"]
            logArchOutput.loadData()
            unmergedLogArchOutput.loadData()

            self.assertEqual(logArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong: %s" % logArchOutput.name)
            self.assertEqual(unmergedLogArchOutput.name, "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMerge%s/merged-logArchive" % goldenOutputMod,
                             "Error: LogArchive output fileset is wrong.")

        topLevelFileset = Fileset(name = "TestWorkload-MonteCarloFromGEN-SomeBlock")
        topLevelFileset.loadData()

        procSubscription = Subscription(fileset = topLevelFileset, workflow = procWorkflow)
        procSubscription.loadData()

        self.assertEqual(procSubscription["type"], "Production",
                         "Error: Wrong subscription type: %s" % procSubscription["type"])
        self.assertEqual(procSubscription["split_algo"], "LumiBased",
                         "Error: Wrong split algo.")

        unmergedReco = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputRECORECO")
        unmergedReco.loadData()
        recoMergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO")
        recoMergeWorkflow.load()
        mergeSubscription = Subscription(fileset = unmergedReco, workflow = recoMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo: %s" % mergeSubscription["split_algo"])

        unmergedAlca = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-outputALCARECOALCARECO")
        unmergedAlca.loadData()
        alcaMergeWorkflow = Workflow(name = "TestWorkload",
                                     task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO")
        alcaMergeWorkflow.load()
        mergeSubscription = Subscription(fileset = unmergedAlca, workflow = alcaMergeWorkflow)
        mergeSubscription.loadData()

        self.assertEqual(mergeSubscription["type"], "Merge",
                         "Error: Wrong subscription type.")
        self.assertEqual(mergeSubscription["split_algo"], "ParentlessMergeBySize",
                         "Error: Wrong split algo: %s" % mergeSubscription["split_algo"])

        for procOutput in ["outputRECORECO", "outputALCARECOALCARECO"]:
            unmerged = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-%s" % procOutput)
            unmerged.loadData()
            cleanupWorkflow = Workflow(name = "TestWorkload",
                                      task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENCleanupUnmerged%s" % procOutput)
            cleanupWorkflow.load()
            cleanupSubscription = Subscription(fileset = unmerged, workflow = cleanupWorkflow)
            cleanupSubscription.loadData()

            self.assertEqual(cleanupSubscription["type"], "Cleanup",
                             "Error: Wrong subscription type.")
            self.assertEqual(cleanupSubscription["split_algo"], "SiblingProcessingBased",
                             "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/unmerged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/MonteCarloFromGEN/LogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/merged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputRECORECO/MonteCarloFromGENoutputRECORECOMergeLogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        procLogCollect = Fileset(name = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/merged-logArchive")
        procLogCollect.loadData()
        procLogCollectWorkflow = Workflow(name = "TestWorkload",
                                          task = "/TestWorkload/MonteCarloFromGEN/MonteCarloFromGENMergeoutputALCARECOALCARECO/MonteCarloFromGENoutputALCARECOALCARECOMergeLogCollect")
        procLogCollectWorkflow.load()
        logCollectSub = Subscription(fileset = procLogCollect, workflow = procLogCollectWorkflow)
        logCollectSub.loadData()

        self.assertEqual(logCollectSub["type"], "LogCollect",
                         "Error: Wrong subscription type.")
        self.assertEqual(logCollectSub["split_algo"], "MinFileBased",
                         "Error: Wrong split algo.")

        return