Exemplo n.º 1
0
    def testRunWhitelist(self):
        """ReReco lumi split with Run whitelist"""
        # get files with multiple runs
        Globals.GlobalParams.setNumOfRunsPerFile(8)
        # a large number of lumis to ensure we get multiple runs
        Globals.GlobalParams.setNumOfLumisPerBlock(20)
        splitArgs = dict(SliceType = 'NumberOfLumis', SliceSize = 1)

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs)
        Tier1ReRecoWorkload.setRunWhitelist([180899, 180992])
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units, rejectedWork = Block(**splitArgs)(Tier1ReRecoWorkload, task)
            # Blocks 1 and 2 match run distribution
            self.assertEqual(2, len(units))
            self.assertEqual(len(rejectedWork), 45)
            # Check number of jobs in element match number for
            # dataset in run whitelist
            jobs = 0
            wq_jobs = 0
            for unit in units:
                wq_jobs += unit['Jobs']
                # This fails. listRunLumis does not work correctly with DBS3, returning None for the # of lumis in a run
                runLumis = dbs[inputDataset.dbsurl].listRunLumis(block = unit['Inputs'].keys()[0])
                for run in runLumis:
                    if run in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist():
                        jobs += runLumis[run]
            self.assertEqual(int(jobs / splitArgs['SliceSize'] ) , int(wq_jobs))
Exemplo n.º 2
0
    def testInvalidSpecs(self):
        """Specs with no work"""
        # no dataset
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset = None
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec, task)

        # invalid dbs url
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com'
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec, task)

        # invalid dataset name
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset.primary = Globals.NOT_EXIST_DATASET
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task)

        # invalid run whitelist
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        processingSpec.setRunWhitelist([666]) # not in this dataset
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task)

        # blocks with 0 files are skipped
        # set all blocks in request to 0 files, no work should be found & an error is raised
        Globals.GlobalParams.setNumOfFilesPerBlock(0)
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task)
        Globals.GlobalParams.resetParams()
Exemplo n.º 3
0
 def testRunWhitelist(self):
     """
     ReReco lumi split with Run whitelist
     This test may not do much of anything anymore since listRunLumis is not in DBS3
     """
     # get files with multiple runs
     Globals.GlobalParams.setNumOfRunsPerFile(2)
     # a large number of lumis to ensure we get multiple runs
     Globals.GlobalParams.setNumOfLumisPerBlock(10)
     splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1)
     rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
     factory = ReRecoWorkloadFactory()
     Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
     Tier1ReRecoWorkload.setRunWhitelist([2, 3])
     Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs)
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary,
                              inputDataset.processed,
                              inputDataset.tier)
     dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         units, _ = Dataset(**splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(1, len(units))
         # Check number of jobs in element match number for
         # dataset in run whitelist
         wq_jobs = 0
         for unit in units:
             wq_jobs += unit['Jobs']
             runLumis = dbs[inputDataset.dbsurl].listRunLumis(dataset=unit['Inputs'].keys()[0])
             print "runLumis", runLumis
             for run in runLumis:
                 if run in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist():
                     self.assertEqual(runLumis[run], None)  # This is what it is with DBS3 unless we calculate it
         self.assertEqual(75, int(wq_jobs))
Exemplo n.º 4
0
    def testRunWhitelist(self):
        """ReReco lumi split with Run whitelist"""
        # get files with multiple runs
        Globals.GlobalParams.setNumOfRunsPerFile(8)
        # a large number of lumis to ensure we get multiple runs
        Globals.GlobalParams.setNumOfLumisPerBlock(20)
        splitArgs = dict(SliceType = 'NumberOfLumis', SliceSize = 1)

        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs)
        Tier1ReRecoWorkload.setRunWhitelist([2, 3])
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units, rejectedWork = Block(**splitArgs)(Tier1ReRecoWorkload, task)
            # Blocks 1 and 2 match run distribution
            self.assertEqual(2, len(units))
            self.assertEqual(len(rejectedWork), 0)
            # Check number of jobs in element match number for
            # dataset in run whitelist
            jobs = 0
            wq_jobs = 0
            for unit in units:
                wq_jobs += unit['Jobs']
                runLumis = dbs[inputDataset.dbsurl].listRunLumis(block = unit['Inputs'].keys()[0])
                for run in runLumis:
                    if run in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist():
                        jobs += runLumis[run]
            self.assertEqual(int(jobs / splitArgs['SliceSize'] ) , int(wq_jobs))
Exemplo n.º 5
0
    def testInvalidSpecs(self):
        """Specs with no work"""
        # no dataset
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset = None
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec, task)

        # invalid dbs url
        processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com'
        for task in processingSpec.taskIterator():
            self.assertRaises(DBSReaderError, Block(), processingSpec, task)

        # dataset non existent
        processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset.name = "/MinimumBias/FAKE-Filter-v1/RECO"
        for task in processingSpec.taskIterator():
            self.assertRaises(DBSReaderError, Block(), processingSpec, task)

        # invalid run whitelist
        processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs)
        processingSpec.setRunWhitelist([666])  # not in this dataset
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task)
Exemplo n.º 6
0
    def testInvalidSpecs(self):
        """Specs with no work"""
        dqmHarvArgs = getRequestArgs()
        # no dataset
        dqmHarvArgs["DQMConfigCacheID"] = createConfig(dqmHarvArgs["CouchDBName"])
        factory = DQMHarvestWorkloadFactory()
        DQMHarvWorkload = factory.factoryWorkloadConstruction('NoInputDatasetTest', dqmHarvArgs)
        DQMHarvWorkload.setSiteWhitelist(["T2_XX_SiteA", "T2_XX_SiteB", "T2_XX_SiteC"])
        getFirstTask(DQMHarvWorkload).data.input.dataset = None
        for task in DQMHarvWorkload.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Dataset(), DQMHarvWorkload, task)

        # invalid dataset name
        DQMHarvWorkload = factory.factoryWorkloadConstruction('InvalidInputDatasetTest', dqmHarvArgs)
        DQMHarvWorkload.setSiteWhitelist(["T2_XX_SiteA", "T2_XX_SiteB", "T2_XX_SiteC"])
        getFirstTask(DQMHarvWorkload).data.input.dataset.name = '/MinimumBias/FAKE-Filter-v1/RECO'
        for task in DQMHarvWorkload.taskIterator():
            self.assertRaises(DBSReaderError, Dataset(), DQMHarvWorkload, task)

        # invalid run whitelist
        DQMHarvWorkload = factory.factoryWorkloadConstruction('InvalidRunNumberTest', dqmHarvArgs)
        DQMHarvWorkload.setSiteWhitelist(["T2_XX_SiteA", "T2_XX_SiteB", "T2_XX_SiteC"])
        DQMHarvWorkload.setRunWhitelist([666])  # not in this dataset
        for task in DQMHarvWorkload.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Dataset(), DQMHarvWorkload, task)
Exemplo n.º 7
0
    def testRunWhitelist(self):
        """ReReco lumi split with Run whitelist"""
        # get files with multiple runs
        Globals.GlobalParams.setNumOfRunsPerFile(2)
        # a large number of lumis to ensure we get multiple runs
        Globals.GlobalParams.setNumOfLumisPerBlock(10)
        splitArgs = dict(SliceType = 'NumberOfLumis', SliceSize = 1)

        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setRunWhitelist([2, 3])
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units = Dataset(**splitArgs)(Tier1ReRecoWorkload, task)
            self.assertEqual(1, len(units))
            # Check number of jobs in element match number for
            # dataset in run whitelist
            jobs = 0
            wq_jobs = 0
            for unit in units:
                wq_jobs += unit['Jobs']
                runs = dbs[inputDataset.dbsurl].listRuns(unit['Inputs'].keys()[0])
                jobs += len([x for x in runs if x in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist()])
            self.assertEqual(int(jobs / splitArgs['SliceSize'] ) , int(wq_jobs))
Exemplo n.º 8
0
    def testRunWhitelist(self):
        """
        ReReco lumi split with Run whitelist
        This test may not do much of anything anymore since listRunLumis is not in DBS3
        """

        splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1)
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setRunWhitelist([181061, 181175])
        Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs)
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units, _ = Dataset(**splitArgs)(Tier1ReRecoWorkload, task)
            self.assertEqual(1, len(units))
            # Check number of jobs in element match number for
            # dataset in run whitelist
            wq_jobs = 0
            for unit in units:
                wq_jobs += unit['Jobs']
                runLumis = dbs[inputDataset.dbsurl].listRunLumis(dataset=unit['Inputs'].keys()[0])
                for run in runLumis:
                    if run in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist():
                        # This is what it is with DBS3 unless we calculate it
                        self.assertEqual(runLumis[run], None)
            self.assertEqual(250, int(wq_jobs))
Exemplo n.º 9
0
    def testRunWhitelist(self):
        """
        ReReco lumi split with Run whitelist
        This test may not do much of anything anymore since listRunLumis is not in DBS3
        """

        splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1)

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs)
        Tier1ReRecoWorkload.setRunWhitelist([180899, 180992])
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()

        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units, rejectedWork = Block(**splitArgs)(Tier1ReRecoWorkload, task)
            # Blocks 1 and 2 match run distribution
            self.assertEqual(2, len(units))
            self.assertEqual(len(rejectedWork), 45)
            # Check number of jobs in element match number for
            # dataset in run whitelist
            wq_jobs = 0
            for unit in units:
                wq_jobs += unit['Jobs']
                # This fails. listRunLumis does not work correctly with DBS3,
                # returning None for the # of lumis in a run
                runLumis = dbs[inputDataset.dbsurl].listRunLumis(block=unit['Inputs'].keys()[0])
                for run in runLumis:
                    if run in getFirstTask(Tier1ReRecoWorkload).inputRunWhitelist():
                        # This is what it is with DBS3 unless we calculate it
                        self.assertEqual(runLumis[run], None)
            self.assertEqual(2, int(wq_jobs))
Exemplo n.º 10
0
    def testInvalidSpecs(self):
        """Specs with no work"""
        # no dataset
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset = None
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec, task)

        # invalid dbs url
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com'
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec, task)

        # invalid dataset name
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset.primary = Globals.NOT_EXIST_DATASET
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task)

        # invalid run whitelist
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        processingSpec.setRunWhitelist([666]) # not in this dataset
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task)
Exemplo n.º 11
0
    def testInvalidSpecs(self):
        """Specs with no work"""
        # no dataset
        processingSpec = rerecoWorkload('ReRecoWorkload', rerecoArgs,
                                        assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        getFirstTask(processingSpec).data.input.dataset = None
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec, task)

        # invalid dbs url
        processingSpec = rerecoWorkload('ReRecoWorkload', rerecoArgs,
                                        assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        getFirstTask(processingSpec).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com'
        for task in processingSpec.taskIterator():
            self.assertRaises(DBSReaderError, Block(), processingSpec, task)

        # dataset non existent
        processingSpec = rerecoWorkload('ReRecoWorkload', rerecoArgs,
                                        assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        getFirstTask(processingSpec).data.input.dataset.name = "/MinimumBias/FAKE-Filter-v1/RECO"
        for task in processingSpec.taskIterator():
            self.assertRaises(DBSReaderError, Block(), processingSpec, task)

        # invalid run whitelist
        processingSpec = rerecoWorkload('ReRecoWorkload', rerecoArgs,
                                        assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        processingSpec.setRunWhitelist([666])  # not in this dataset
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec, task)
Exemplo n.º 12
0
    def testInvalidSpecs(self):
        """Specs with no work"""
        # no dataset
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset = None
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec, task)

        # invalid dbs url
        processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com'
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec, task)

        # invalid dataset name
        processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset.primary = Globals.NOT_EXIST_DATASET
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task)

        # invalid run whitelist
        processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs)
        processingSpec.setRunWhitelist([666]) # not in this dataset
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task)
Exemplo n.º 13
0
 def testInvalidSpecs(self):
     """Specs with no work"""
     mcspec = monteCarloWorkload('testProcessingInvalid', mcArgs)
     # 0 events
     getFirstTask(mcspec).addProduction(totalevents = 0)
     for task in mcspec.taskIterator():
         self.assertRaises(WorkQueueNoWorkError, MonteCarlo(), mcspec, task)
Exemplo n.º 14
0
    def testMultiMergeProductionWorkload(self):
        """Multi merge production workload"""
        getFirstTask(MultiMergeProductionWorkload).setSiteWhitelist(['T2_XX_SiteA', 'T2_XX_SiteB'])
        for task in MultiMergeProductionWorkload.taskIterator():
            units = MonteCarlo(**self.splitArgs)(MultiMergeProductionWorkload, task)

            self.assertEqual(10.0, len(units))
            for unit in units:
                self.assertEqual(1, unit['Jobs'])
                self.assertEqual(unit['WMSpec'], MultiMergeProductionWorkload)
                self.assertEqual(unit['Task'], task)
Exemplo n.º 15
0
    def testInvalidSpecs(self):
        """Specs with no work"""
        mcspec = monteCarloWorkload('testProcessingInvalid', mcArgs)
        # 0 events
        getFirstTask(mcspec).addProduction(totalevents = 0)
        for task in mcspec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, MonteCarlo(), mcspec, task)

        # -ve split size
        mcspec2 = monteCarloWorkload('testProdInvalid', mcArgs)
        mcspec2.data.policies.start.SliceSize = -100
        for task in mcspec2.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, MonteCarlo(), mcspec2, task)
Exemplo n.º 16
0
    def _getLHEProductionWorkload(self, splitArgs):

        totalevents = 1010

        LHEProductionWorkload = monteCarloWorkload('MonteCarloWorkload', mcArgs)
        LHEProductionWorkload.setJobSplittingParameters(
            getFirstTask(LHEProductionWorkload).getPathName(),
            'EventBased',
            {'events_per_job': splitArgs['SliceSize'],
             'events_per_lumi': splitArgs['SubSliceSize']})
        getFirstTask(LHEProductionWorkload).addProduction(totalEvents=totalevents)
        getFirstTask(LHEProductionWorkload).setSiteWhitelist(['T2_XX_SiteA', 'T2_XX_SiteB'])

        return LHEProductionWorkload
Exemplo n.º 17
0
    def testShiftedStartSplitting(self):
        """
        _testShiftedStartSplitting_

        Make sure that splitting by event plus events in per lumi works
        when the first event and lumi is not 1

        """
        totalevents = 542674
        splitArgs = dict(SliceType = 'NumEvents', SliceSize = 47, MaxJobsPerElement = 5,
                         SubSliceType = 'NumEventsPerLumi', SubSliceSize = 13)

        LHEProductionWorkload = monteCarloWorkload('MonteCarloWorkload', mcArgs)
        LHEProductionWorkload.setJobSplittingParameters(
            getFirstTask(LHEProductionWorkload).getPathName(),
            'EventBased',
            {'events_per_job': splitArgs['SliceSize'],
             'events_per_lumi': splitArgs['SubSliceSize']})
        getFirstTask(LHEProductionWorkload).setSiteWhitelist(['T2_XX_SiteA', 'T2_XX_SiteB'])
        getFirstTask(LHEProductionWorkload).addProduction(totalevents = totalevents)
        getFirstTask(LHEProductionWorkload).setFirstEventAndLumi(50,100)
        getFirstTask(LHEProductionWorkload).setSiteWhitelist(['T2_XX_SiteA', 'T2_XX_SiteB'])
        for task in LHEProductionWorkload.taskIterator():
            units, _ = MonteCarlo(**splitArgs)(LHEProductionWorkload, task)

            SliceSize = LHEProductionWorkload.startPolicyParameters()['SliceSize']
            self.assertEqual(math.ceil(float(totalevents) / (SliceSize * splitArgs['MaxJobsPerElement'])),
                             len(units))
            first_event = 50
            first_lumi = 100
            first_run = 1
            lumis_per_job = int(math.ceil(float(SliceSize) /
                                splitArgs['SubSliceSize']))
            for unit in units:
                self.assertTrue(unit['Jobs'] <= splitArgs['MaxJobsPerElement'])
                self.assertEqual(unit['WMSpec'], LHEProductionWorkload)
                self.assertEqual(unit['Task'], task)
                self.assertEqual(unit['Mask']['FirstEvent'], first_event)
                self.assertEqual(unit['Mask']['FirstLumi'], first_lumi)
                last_event = first_event + (SliceSize * unit['Jobs']) - 1
                last_lumi = first_lumi + (lumis_per_job * unit['Jobs']) - 1
                if last_event > totalevents:
                    # this should be the last unit of work
                    last_event = totalevents + 50 - 1
                    last_lumi = first_lumi
                    last_lumi += math.ceil(((last_event - first_event + 1) %
                                SliceSize) / splitArgs['SubSliceSize'])
                    last_lumi += (lumis_per_job * (unit['Jobs'] - 1)) - 1
                self.assertEqual(unit['Mask']['LastEvent'], last_event)
                self.assertEqual(unit['Mask']['LastLumi'], last_lumi)
                self.assertEqual(unit['Mask']['FirstRun'], first_run)
                first_event = last_event + 1
                first_lumi  = last_lumi + 1
            self.assertEqual(unit['Mask']['LastEvent'], totalevents + 50 - 1)
Exemplo n.º 18
0
    def testMultiTaskProcessingWorkload(self):
        """Multi Task Processing Workflow"""
        count = 0
        getFirstTask(MultiTaskProductionWorkload).setSiteWhitelist(['T2_XX_SiteA', 'T2_XX_SiteB'])
        for task in MultiTaskProductionWorkload.taskIterator():
            count += 1
            task.setFirstEventAndLumi(1, 1)
            units, _ = MonteCarlo(**self.splitArgs)(MultiTaskProductionWorkload, task)

            self.assertEqual(10 * count, len(units))
            for unit in units:
                self.assertEqual(1, unit['Jobs'])
                self.assertEqual(unit['WMSpec'], MultiTaskProductionWorkload)
                self.assertEqual(unit['Task'], task)
        self.assertEqual(count, 2)
Exemplo n.º 19
0
 def testTier1ReRecoWorkload(self):
     """Tier1 Re-reco workflow"""
     Tier1ReRecoWorkload = rerecoWorkload(
         'ReRecoWorkload',
         rerecoArgs,
         assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
     Tier1ReRecoWorkload.data.request.priority = 69
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                              inputDataset.tier)
     dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         units, _, _ = Block(**self.splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(47, len(units))
         for unit in units:
             self.assertEqual(69, unit['Priority'])
             self.assertTrue(1 <= unit['Jobs'])
             self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
             self.assertEqual(task, unit['Task'])
             self.assertTrue(1 <= unit['NumberOfLumis'])
             self.assertTrue(1 <= unit['NumberOfFiles'])
             self.assertTrue(0 <= unit['NumberOfEvents'])
         self.assertEqual(
             len(units),
             len(dbs[inputDataset.dbsurl].listFileBlocks(dataset)))
Exemplo n.º 20
0
    def testParentProcessing(self):
        """
        test parent processing: should have the same results as rereco test
        with the parent flag and dataset.
        """
        parentProcArgs["ConfigCacheID"] = createConfig(parentProcArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        parentProcSpec = factory.factoryWorkloadConstruction('testParentProcessing', parentProcArgs)

        inputDataset = getFirstTask(parentProcSpec).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
        for task in parentProcSpec.taskIterator():
            units, _ = Block(**self.splitArgs)(parentProcSpec, task)
            self.assertEqual(Globals.GlobalParams.numOfBlocksPerDataset(), len(units))
            blocks = [] # fill with blocks as we get work units for them
            for unit in units:
                self.assertEqual(1, unit['Jobs'])
                self.assertEqual(parentProcSpec, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertEqual(True, unit['ParentFlag'])
                self.assertEqual(1, len(unit['ParentData']))
            self.assertEqual(len(units),
                             len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
Exemplo n.º 21
0
    def notestParentProcessing(self):
        # Does not work with a RAW dataset, need a different workload
        """
        test parent processing: should have the same results as rereco test
        with the parent flag and dataset.
        """
        parentProcArgs["ConfigCacheID"] = createConfig(
            parentProcArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        parentProcSpec = factory.factoryWorkloadConstruction(
            'testParentProcessing', parentProcArgs)

        inputDataset = getFirstTask(parentProcSpec).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in parentProcSpec.taskIterator():
            units, _, _ = Block(**self.splitArgs)(parentProcSpec, task)
            self.assertEqual(47, len(units))
            for unit in units:
                self.assertTrue(1 <= unit['Jobs'])
                self.assertEqual(parentProcSpec, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertEqual(True, unit['ParentFlag'])
                self.assertEqual(1, len(unit['ParentData']))
            self.assertEqual(
                len(units),
                len(dbs[inputDataset.dbsurl].listFileBlocks(dataset)))
Exemplo n.º 22
0
    def testParentProcessing(self):
        """
        test parent processing: should have the same results as rereco test
        with the parent flag and dataset.
        """
        parentProcSpec = rerecoWorkload('testParentProcessing', parentProcArgs)

        inputDataset = getFirstTask(parentProcSpec).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in parentProcSpec.taskIterator():
            units = Block(**self.splitArgs)(parentProcSpec, task)
            self.assertEqual(Globals.GlobalParams.numOfBlocksPerDataset(),
                             len(units))
            blocks = []  # fill with blocks as we get work units for them
            for unit in units:
                self.assertEqual(1, unit['Jobs'])
                self.assertEqual(parentProcSpec, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertEqual(True, unit['ParentFlag'])
                self.assertEqual(1, len(unit['ParentData']))
            self.assertEqual(
                len(units),
                len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
Exemplo n.º 23
0
    def testWithMaskedBlocks(self):
        """
        _testWithMaskedBlocks_

        Test job splitting with masked blocks
        """

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        dummyDataset = task.inputDataset()

        task.data.input.splitting.runs = [181061, 180899]
        task.data.input.splitting.lumis = ['1,50,60,70', '1,1']
        lumiMask = LumiList(compactList={'206371': [[1, 50], [60, 70]], '180899': [[1, 1]], })

        units, dummyRejectedWork = Block(**self.splitArgs)(Tier1ReRecoWorkload, task)

        nLumis = 0
        for unit in units:
            nLumis += unit['NumberOfLumis']

        self.assertEqual(len(lumiMask.getLumis()), nLumis)
Exemplo n.º 24
0
    def notestParentProcessing(self):
        # Does not work with a RAW dataset, need a different workload
        """
        test parent processing: should have the same results as rereco test
        with the parent flag and dataset.
        """
        parentProcArgs["ConfigCacheID"] = createConfig(parentProcArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        parentProcSpec = factory.factoryWorkloadConstruction('testParentProcessing', parentProcArgs)

        inputDataset = getFirstTask(parentProcSpec).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                 inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in parentProcSpec.taskIterator():
            units, _ = Block(**self.splitArgs)(parentProcSpec, task)
            self.assertEqual(47, len(units))
            for unit in units:
                import pdb
                pdb.set_trace()
                self.assertTrue(1 <= unit['Jobs'])
                self.assertEqual(parentProcSpec, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertEqual(True, unit['ParentFlag'])
                self.assertEqual(1, len(unit['ParentData']))
            self.assertEqual(len(units),
                             len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
Exemplo n.º 25
0
 def testParentProcessing(self):
     """
     test parent processing: should have the same results as rereco test
     with the parent flag and dataset.
     """
     splitArgs = dict(SliceType='NumberOfLumis', SliceSize=2)
     parentProcArgs = getReRecoArgs(parent=True)
     parentProcArgs["ConfigCacheID"] = createConfig(parentProcArgs["CouchDBName"])
     # This dataset does have parents. Adding it here to keep the test going.
     # It seems like "dbs" below is never used
     parentProcArgs2 = {}
     parentProcArgs2.update(parentProcArgs)
     #parentProcArgs2.update({'InputDataset': '/SingleMu/CMSSW_6_2_0_pre4-PRE_61_V1_RelVal_mu2012A-v1/RECO'})
     parentProcArgs2.update({'InputDataset': '/Cosmics/ComissioningHI-PromptReco-v1/RECO'})
     from pprint import pprint
     pprint(parentProcArgs2)
     parentProcSpec = rerecoWorkload('ReRecoWorkload', parentProcArgs2,
                                     assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
     parentProcSpec.setStartPolicy('Dataset', **splitArgs)
     inputDataset = getFirstTask(parentProcSpec).getInputDatasetPath()
     for task in parentProcSpec.taskIterator():
         units, _, _ = Dataset(**splitArgs)(parentProcSpec, task)
         self.assertEqual(1, len(units))
         for unit in units:
             self.assertEqual(3993, unit['Jobs'])
             self.assertEqual(7985, unit['NumberOfLumis'])
             self.assertEqual(parentProcSpec, unit['WMSpec'])
             self.assertEqual(task, unit['Task'])
             self.assertEqual(unit['Inputs'].keys(), [inputDataset])
             self.assertEqual(True, unit['ParentFlag'])
             self.assertEqual(0, len(unit['ParentData']))
Exemplo n.º 26
0
    def testGetMaskedBlocks(self):
        """
        _testGetMaskedBlocks_

        Check that getMaskedBlocks is returning the correct information
        """

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        inputDataset = task.inputDataset()
        inputDataset.primary = 'SingleElectron'
        inputDataset.processed = 'StoreResults-Run2011A-WElectron-PromptSkim-v4-ALCARECO-NOLC-36cfce5a1d3f3ab4df5bd2aa0a4fa380'
        inputDataset.tier = 'USER'

        task.data.input.splitting.runs = [166921, 166429, 166911]
        task.data.input.splitting.lumis = ['40,70', '1,50', '1,5,16,20']
        lumiMask = LumiList(compactList={'166921': [[40, 70]], '166429': [[1, 50]], '166911': [[1, 5], [16, 20]], })
        inputLumis = LumiList(compactList={'166921': [[1, 67]], '166429': [[1, 91]], '166911': [[1, 104]], })
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                 inputDataset.processed,
                                 inputDataset.tier)
        dbs = DBSReader(inputDataset.dbsurl)
        maskedBlocks = Block(**self.splitArgs).getMaskedBlocks(task, dbs, dataset)
        for dummyBlock, files in maskedBlocks.iteritems():
            for dummyFile, lumiList in files.iteritems():
                self.assertEqual(str(lumiList), str(inputLumis & lumiMask))
Exemplo n.º 27
0
    def testWithMaskedBlocks(self):
        """
        _testWithMaskedBlocks_

        Test job splitting with masked blocks
        """

        Tier1ReRecoWorkload = rerecoWorkload(
            'ReRecoWorkload',
            rerecoArgs,
            assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        dummyDataset = task.inputDataset()

        task.data.input.splitting.runs = [181061, 180899]
        task.data.input.splitting.lumis = ['1,50,60,70', '1,1']
        lumiMask = LumiList(compactList={
            '206371': [[1, 50], [60, 70]],
            '180899': [[1, 1]],
        })

        units, _, _ = Block(**self.splitArgs)(Tier1ReRecoWorkload, task)

        nLumis = 0
        for unit in units:
            nLumis += unit['NumberOfLumis']

        self.assertEqual(len(lumiMask.getLumis()), nLumis)
Exemplo n.º 28
0
    def testParentProcessing(self):
        """
        test parent processing: should have the same results as rereco test
        with the parent flag and dataset.
        """
        parentProcArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        # This dataset does have parents. Adding it here to keep the test going. It seems like "dbs" below is never used
        parentProcArgs2 = {}
        parentProcArgs2.update(parentProcArgs)
        parentProcArgs2.update({'InputDataset': '/SingleMu/CMSSW_6_2_0_pre4-PRE_61_V1_RelVal_mu2012A-v1/RECO'})
        parentProcSpec = factory.factoryWorkloadConstruction('testParentProcessing', parentProcArgs2)
        parentProcSpec.setStartPolicy('Dataset', **self.splitArgs)
        inputDataset = getFirstTask(parentProcSpec).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
        for task in parentProcSpec.taskIterator():
            units, _ = Dataset(**self.splitArgs)(parentProcSpec, task)
            self.assertEqual(1, len(units))
            for unit in units:
                self.assertEqual(4, unit['Jobs'])
                self.assertEqual(parentProcSpec, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertEqual(unit['Inputs'].keys(), [dataset])
                self.assertEqual(True, unit['ParentFlag'])
                self.assertEqual(0, len(unit['ParentData']))
Exemplo n.º 29
0
    def testParentProcessing(self):
        """
        test parent processing: should have the same results as rereco test
        with the parent flag and dataset.
        """
        splitArgs = dict(SliceType='NumberOfLumis', SliceSize=2)
        parentProcArgs = getReRecoArgs(parent=True)
        parentProcArgs["ConfigCacheID"] = createConfig(parentProcArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        # This dataset does have parents. Adding it here to keep the test going.
        # It seems like "dbs" below is never used
        parentProcArgs2 = {}
        parentProcArgs2.update(parentProcArgs)
        parentProcArgs2.update({'InputDataset': '/SingleMu/CMSSW_6_2_0_pre4-PRE_61_V1_RelVal_mu2012A-v1/RECO'})
        parentProcSpec = factory.factoryWorkloadConstruction('testParentProcessing', parentProcArgs2)
        parentProcSpec.setStartPolicy('Dataset', **splitArgs)
        inputDataset = getFirstTask(parentProcSpec).getInputDatasetPath()
        for task in parentProcSpec.taskIterator():
            units, _ = Dataset(**splitArgs)(parentProcSpec, task)
            self.assertEqual(1, len(units))
            for unit in units:
                self.assertEqual(847, unit['Jobs'])
                self.assertEqual(1694, unit['NumberOfLumis'])
                self.assertEqual(parentProcSpec, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertEqual(unit['Inputs'].keys(), [inputDataset])
                self.assertEqual(True, unit['ParentFlag'])
                self.assertEqual(0, len(unit['ParentData']))
Exemplo n.º 30
0
    def testWithMaskedBlocks(self):
        """
        _testWithMaskedBlocks_

        Test job splitting with masked blocks
        """

        Globals.GlobalParams.setNumOfRunsPerFile(3)
        Globals.GlobalParams.setNumOfLumisPerBlock(5)
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        inputDataset = task.inputDataset()

        task.data.input.splitting.runs = [181061, 180899]
        task.data.input.splitting.lumis = ['1,50,60,70', '1,1']
        lumiMask = LumiList(compactList = {'206371': [[1, 50], [60,70]], '180899':[[1,1]], } )

        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
        units, rejectedWork = Block(**self.splitArgs)(Tier1ReRecoWorkload, task)

        nLumis = 0
        for unit in units:
            nLumis += unit['NumberOfLumis']

        self.assertEqual(len(lumiMask.getLumis()), nLumis)
Exemplo n.º 31
0
 def testTier1ReRecoWorkload(self):
     """Tier1 Re-reco workflow"""
     rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
     factory = ReRecoWorkloadFactory()
     Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
     Tier1ReRecoWorkload.data.request.priority = 69
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary,
                                  inputDataset.processed,
                                  inputDataset.tier)
     dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         units, _ = Block(**self.splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(47, len(units))
         blocks = [] # fill with blocks as we get work units for them
         for unit in units:
             self.assertEqual(69, unit['Priority'])
             self.assertTrue(1 <= unit['Jobs'])
             self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
             self.assertEqual(task, unit['Task'])
             self.assertTrue(1 <= unit['NumberOfLumis'])
             self.assertTrue(1 <= unit['NumberOfFiles'])
             self.assertTrue(0 <= unit['NumberOfEvents'])
         self.assertEqual(len(units),
                          len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))
Exemplo n.º 32
0
    def testLumiMask(self):
        """Lumi mask test"""
        rerecoArgs2 = {}
        rerecoArgs2.update(rerecoArgs)
        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2)
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}

        # Block blacklist
        lumiWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2)
        task = getFirstTask(lumiWorkload)
        task.data.input.splitting.runs = ['1']
        task.data.input.splitting.lumis = ['1,1']
        units = Block(**self.splitArgs)(lumiWorkload, task)
        self.assertEqual(len(units), 1)
Exemplo n.º 33
0
    def _getLHEProductionWorkload(self, splitArgs):

        totalevents = 1010

        LHEProductionWorkload = monteCarloWorkload('MonteCarloWorkload',
                                                   mcArgs)
        LHEProductionWorkload.setJobSplittingParameters(
            getFirstTask(LHEProductionWorkload).getPathName(), 'EventBased', {
                'events_per_job': splitArgs['SliceSize'],
                'events_per_lumi': splitArgs['SubSliceSize']
            })
        getFirstTask(LHEProductionWorkload).addProduction(
            totalEvents=totalevents)
        getFirstTask(LHEProductionWorkload).setSiteWhitelist(
            ['T2_XX_SiteA', 'T2_XX_SiteB'])

        return LHEProductionWorkload
Exemplo n.º 34
0
    def testMultiTaskProcessingWorkload(self):
        """Multi Task Processing Workflow"""
        count = 0
        getFirstTask(MultiTaskProductionWorkload).setSiteWhitelist(
            ['T2_XX_SiteA', 'T2_XX_SiteB'])
        for task in MultiTaskProductionWorkload.taskIterator():
            count += 1
            task.setFirstEventAndLumi(1, 1)
            units, _, _ = MonteCarlo(**self.splitArgs)(
                MultiTaskProductionWorkload, task)

            self.assertEqual(10 * count, len(units))
            for unit in units:
                self.assertEqual(1, unit['Jobs'])
                self.assertEqual(unit['WMSpec'], MultiTaskProductionWorkload)
                self.assertEqual(unit['Task'], task)
        self.assertEqual(count, 2)
Exemplo n.º 35
0
    def testShiftedStartSplitting(self):
        """
        _testShiftedStartSplitting_

        Make sure that splitting by event plus events in per lumi works
        when the first event and lumi is not 1

        """
        totalevents = 542674
        splitArgs = dict(SliceType='NumEvents', SliceSize=47, MaxJobsPerElement=5, SubSliceType='NumEventsPerLumi',
                         SubSliceSize=13)

        LHEProductionWorkload = taskChainWorkload('MonteCarloWorkload', mcArgs)
        LHEProductionWorkload.setJobSplittingParameters(getFirstTask(LHEProductionWorkload).getPathName(), 'EventBased',
                                                        {'events_per_job': splitArgs['SliceSize'],
                                                         'events_per_lumi': splitArgs['SubSliceSize']})
        getFirstTask(LHEProductionWorkload).setSiteWhitelist(['T2_XX_SiteA', 'T2_XX_SiteB'])
        getFirstTask(LHEProductionWorkload).addProduction(totalEvents=totalevents)
        getFirstTask(LHEProductionWorkload).setFirstEventAndLumi(50, 100)
        getFirstTask(LHEProductionWorkload).setSiteWhitelist(['T2_XX_SiteA', 'T2_XX_SiteB'])
        for task in LHEProductionWorkload.taskIterator():
            units, _, _ = MonteCarlo(**splitArgs)(LHEProductionWorkload, task)

            SliceSize = LHEProductionWorkload.startPolicyParameters()['SliceSize']
            self.assertEqual(math.ceil(totalevents / (SliceSize * splitArgs['MaxJobsPerElement'])), len(units))
            first_event = 50
            first_lumi = 100
            first_run = 1
            lumis_per_job = int(math.ceil(SliceSize / splitArgs['SubSliceSize']))
            for unit in units:
                self.assertTrue(unit['Jobs'] <= splitArgs['MaxJobsPerElement'])
                self.assertEqual(unit['WMSpec'], LHEProductionWorkload)
                self.assertEqual(unit['Task'], task)
                self.assertEqual(unit['Mask']['FirstEvent'], first_event)
                self.assertEqual(unit['Mask']['FirstLumi'], first_lumi)
                last_event = first_event + (SliceSize * unit['Jobs']) - 1
                last_lumi = first_lumi + (lumis_per_job * unit['Jobs']) - 1
                if last_event > totalevents:
                    # this should be the last unit of work
                    last_event = totalevents + 50 - 1
                    last_lumi = first_lumi
                    last_lumi += math.ceil(((last_event - first_event + 1) % SliceSize) / splitArgs['SubSliceSize'])
                    last_lumi += (lumis_per_job * (unit['Jobs'] - 1)) - 1
                self.assertEqual(unit['Mask']['LastEvent'], last_event)
                self.assertEqual(unit['Mask']['LastLumi'], last_lumi)
                self.assertEqual(unit['Mask']['FirstRun'], first_run)
                first_event = last_event + 1
                first_lumi = last_lumi + 1
            self.assertEqual(unit['Mask']['LastEvent'], totalevents + 50 - 1)
Exemplo n.º 36
0
    def testLumiMask(self):
        """Lumi mask test"""
        rerecoArgs2 = {}
        rerecoArgs2.update(rerecoArgs)
        rerecoArgs2["ConfigCacheID"] = createConfig(rerecoArgs2["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs2)
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()

        # Block blacklist
        lumiWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload',
                                                           rerecoArgs2)
        task = getFirstTask(lumiWorkload)
        #task.data.input.splitting.runs = ['1']
        task.data.input.splitting.runs = ['180992']
        task.data.input.splitting.lumis = ['1,1']
        units, rejectedWork = Block(**self.splitArgs)(lumiWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(len(rejectedWork), 46)
Exemplo n.º 37
0
    def testExtremeSplits(self):
        """
        _testExtremeSplits_

        Make sure that the protection to avoid going over 2^32 works

        """
        totalevents = 2**34
        splitArgs = dict(SliceType='NumEvents',
                         SliceSize=2**30,
                         MaxJobsPerElement=7)

        LHEProductionWorkload = monteCarloWorkload('MonteCarloWorkload',
                                                   mcArgs)
        LHEProductionWorkload.setJobSplittingParameters(
            getFirstTask(LHEProductionWorkload).getPathName(), 'EventBased', {
                'events_per_job': splitArgs['SliceSize'],
                'events_per_lumi': splitArgs['SliceSize']
            })
        getFirstTask(LHEProductionWorkload).setSiteWhitelist(
            ['T2_XX_SiteA', 'T2_XX_SiteB'])
        getFirstTask(LHEProductionWorkload).addProduction(
            totalEvents=totalevents)
        getFirstTask(LHEProductionWorkload).setSiteWhitelist(
            ['T2_XX_SiteA', 'T2_XX_SiteB'])
        for task in LHEProductionWorkload.taskIterator():
            units, _, _ = MonteCarlo(**splitArgs)(LHEProductionWorkload, task)

            SliceSize = LHEProductionWorkload.startPolicyParameters(
            )['SliceSize']
            self.assertEqual(
                math.ceil(totalevents /
                          (SliceSize * splitArgs['MaxJobsPerElement'])),
                len(units))
            self.assertEqual(len(units), 3, "Should produce 3 units")

            unit1 = units[0]
            unit2 = units[1]
            unit3 = units[2]

            self.assertEqual(unit1['Jobs'], 7,
                             'First unit produced more jobs than expected')
            self.assertEqual(unit1['Mask']['FirstEvent'], 1,
                             'First unit has a wrong first event')
            self.assertEqual(unit1['Mask']['LastEvent'], 7 * (2**30),
                             'First unit has a wrong last event')
            self.assertEqual(unit2['Jobs'], 7,
                             'Second unit produced more jobs than expected')
            self.assertEqual(unit2['Mask']['FirstEvent'], 2**30 + 1,
                             'Second unit has a wrong first event')
            self.assertEqual(unit2['Mask']['LastEvent'], 8 * (2**30),
                             'Second unit has a wrong last event')
            self.assertEqual(unit3['Jobs'], 2,
                             'Third unit produced more jobs than expected')
            self.assertEqual(unit3['Mask']['FirstEvent'], 2 * (2**30) + 1,
                             'Third unit has a wrong first event')
            self.assertEqual(unit3['Mask']['LastEvent'], 4 * (2**30),
                             'First unit has a wrong last event')
Exemplo n.º 38
0
    def testInvalidSpecs(self):
        """Specs with no work"""
        # no dataset
        processingSpec = rerecoWorkload(
            'ReRecoWorkload',
            rerecoArgs,
            assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        getFirstTask(processingSpec).data.input.dataset = None
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec,
                              task)

        # invalid dbs url
        processingSpec = rerecoWorkload(
            'ReRecoWorkload',
            rerecoArgs,
            assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        getFirstTask(
            processingSpec
        ).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com'
        for task in processingSpec.taskIterator():
            self.assertRaises(DBSReaderError, Block(), processingSpec, task)

        # dataset non existent
        processingSpec = rerecoWorkload(
            'ReRecoWorkload',
            rerecoArgs,
            assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        getFirstTask(
            processingSpec
        ).data.input.dataset.name = "/MinimumBias/FAKE-Filter-v1/RECO"
        for task in processingSpec.taskIterator():
            self.assertRaises(DBSReaderError, Block(), processingSpec, task)

        # invalid run whitelist
        processingSpec = rerecoWorkload(
            'ReRecoWorkload',
            rerecoArgs,
            assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        processingSpec.setRunWhitelist([666])  # not in this dataset
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec,
                              task)
Exemplo n.º 39
0
    def testLumiMask(self):
        """Lumi mask test"""
        rerecoArgs2 = {}
        rerecoArgs2.update(rerecoArgs)
        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2)
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}

        # Block blacklist
        lumiWorkload = rerecoWorkload('ReRecoWorkload',
                                              rerecoArgs2)
        task = getFirstTask(lumiWorkload)
        task.data.input.splitting.runs = ['1'] 
        task.data.input.splitting.lumis = ['1,1']
        units = Block(**self.splitArgs)(lumiWorkload, task)
        self.assertEqual(len(units), 1)
Exemplo n.º 40
0
    def testLumiMask(self):
        """Lumi mask test"""
        rerecoArgs2 = {}
        rerecoArgs2.update(rerecoArgs)
        rerecoArgs2["ConfigCacheID"] = createConfig(rerecoArgs2["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs2)
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()

        # Block blacklist
        lumiWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs2)
        task = getFirstTask(lumiWorkload)
        #task.data.input.splitting.runs = ['1']
        task.data.input.splitting.runs = ['180992']
        task.data.input.splitting.lumis = ['1,1']
        units, rejectedWork = Block(**self.splitArgs)(lumiWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(len(rejectedWork), 46)
Exemplo n.º 41
0
    def testIgnore0SizeBlocks(self):
        """Ignore blocks with 0 files"""
        Globals.GlobalParams.setNumOfFilesPerBlock(0)
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setRunWhitelist([2, 3])
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()

        for task in Tier1ReRecoWorkload.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(**self.splitArgs), Tier1ReRecoWorkload, task)
Exemplo n.º 42
0
    def testRunWhitelist(self):
        """
        ReReco lumi split with Run whitelist
        This test may not do much of anything anymore since listRunLumis is not in DBS3
        """
        # get files with multiple runs
        Globals.GlobalParams.setNumOfRunsPerFile(8)
        # a large number of lumis to ensure we get multiple runs
        Globals.GlobalParams.setNumOfLumisPerBlock(20)
        splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1)

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs)
        Tier1ReRecoWorkload.setRunWhitelist([180899, 180992])
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units, rejectedWork = Block(**splitArgs)(Tier1ReRecoWorkload, task)
            # Blocks 1 and 2 match run distribution
            self.assertEqual(2, len(units))
            self.assertEqual(len(rejectedWork), 45)
            # Check number of jobs in element match number for
            # dataset in run whitelist
            wq_jobs = 0
            for unit in units:
                wq_jobs += unit['Jobs']
                # This fails. listRunLumis does not work correctly with DBS3, returning None for the # of lumis in a run
                runLumis = dbs[inputDataset.dbsurl].listRunLumis(
                    block=unit['Inputs'].keys()[0])
                for run in runLumis:
                    if run in getFirstTask(
                            Tier1ReRecoWorkload).inputRunWhitelist():
                        self.assertEqual(
                            runLumis[run], None
                        )  # This is what it is with DBS3 unless we calculate it
            self.assertEqual(2, int(wq_jobs))
Exemplo n.º 43
0
    def testInvalidSpecs(self):
        """Specs with no work"""
        # no dataset
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset = None
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec,
                              task)

        # invalid dbs url
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(
            processingSpec
        ).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com'
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec,
                              task)

        # invalid dataset name
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec
                     ).data.input.dataset.primary = Globals.NOT_EXIST_DATASET
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec,
                              task)

        # invalid run whitelist
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        processingSpec.setRunWhitelist([666])  # not in this dataset
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec,
                              task)

        # blocks with 0 files are skipped
        # set all blocks in request to 0 files, no work should be found & an error is raised
        Globals.GlobalParams.setNumOfFilesPerBlock(0)
        processingSpec = rerecoWorkload('testProcessingInvalid', rerecoArgs)
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec,
                              task)
        Globals.GlobalParams.resetParams()
Exemplo n.º 44
0
 def testDataDirectiveFromQueue(self):
     """Test data directive from queue"""
     Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                              inputDataset.tier)
     dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         Dataset(**self.splitArgs)(Tier1ReRecoWorkload, task, {dataset: []})
         self.assertRaises(RuntimeError, Dataset(**self.splitArgs),
                           Tier1ReRecoWorkload, task, dbs,
                           {dataset + '1': []})
Exemplo n.º 45
0
 def testDataDirectiveFromQueue(self):
     """Test data directive from queue"""
     rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
     factory = ReRecoWorkloadFactory()
     Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier)
     dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         Dataset(**self.splitArgs)(Tier1ReRecoWorkload, task, {dataset: []})
         self.assertRaises(RuntimeError, Dataset(**self.splitArgs), Tier1ReRecoWorkload, task, dbs,
                           {dataset + '1': []})
Exemplo n.º 46
0
    def testInvalidSpecs(self):
        """Specs with no work"""
        # no dataset
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset = None
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Dataset(), processingSpec, task)

        # invalid dataset name
        processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset.primary = Globals.NOT_EXIST_DATASET

        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task)

        # invalid run whitelist
        processingSpec = factory.factoryWorkloadConstruction('testProcessingInvalid', rerecoArgs)
        processingSpec.setRunWhitelist([666])  # not in this dataset
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Dataset(), processingSpec, task)
Exemplo n.º 47
0
 def testLumiSplitTier1ReRecoWorkload(self):
     """Tier1 Re-reco workflow split by Lumi"""
     splitArgs = dict(SliceType='NumberOfLumis', SliceSize=2)
     rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
     factory = ReRecoWorkloadFactory()
     Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
     Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs)
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     for task in Tier1ReRecoWorkload.taskIterator():
         units, _ = Dataset(**splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(1, len(units))
         for unit in units:
             self.assertEqual(2428, unit['Jobs'])
Exemplo n.º 48
0
    def testIgnore0SizeBlocks(self):
        """Ignore blocks with 0 files"""
        Globals.GlobalParams.setNumOfFilesPerBlock(0)

        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setRunWhitelist([2, 3])
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(**self.splitArgs),
                              Tier1ReRecoWorkload, task)
Exemplo n.º 49
0
    def atestLumiMask(self):
        """Lumi mask test"""
        rerecoArgs2 = {}
        rerecoArgs2.update(rerecoArgs)
        rerecoArgs2["ConfigCacheID"] = createConfig(rerecoArgs2["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs2)
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}

        # Block blacklist
        lumiWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs2)
        task = getFirstTask(lumiWorkload)
        task.data.input.splitting.runs = ['1']
        task.data.input.splitting.lumis = ['1,1']
        units, rejectedWork = Block(**self.splitArgs)(lumiWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(len(rejectedWork), 1)
Exemplo n.º 50
0
    def testIgnore0SizeBlocks(self):
        """Ignore blocks with 0 files"""
        Globals.GlobalParams.setNumOfFilesPerBlock(0)
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setRunWhitelist([2, 3])
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()

        for task in Tier1ReRecoWorkload.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(**self.splitArgs),
                              Tier1ReRecoWorkload, task)
Exemplo n.º 51
0
    def testRunWhitelist(self):
        """
        ReReco lumi split with Run whitelist
        This test may not do much of anything anymore since listRunLumis is not in DBS3
        """

        splitArgs = dict(SliceType='NumberOfLumis', SliceSize=1)

        Tier1ReRecoWorkload = rerecoWorkload(
            'ReRecoWorkload',
            rerecoArgs,
            assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        Tier1ReRecoWorkload.setStartPolicy('Block', **splitArgs)
        Tier1ReRecoWorkload.setRunWhitelist([180899, 180992])
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()

        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units, rejectedWork, badWork = Block(**splitArgs)(
                Tier1ReRecoWorkload, task)
            # Blocks 1 and 2 match run distribution
            self.assertEqual(2, len(units))
            self.assertEqual(len(rejectedWork), 45)
            self.assertEqual(len(badWork), 0)
            # Check number of jobs in element match number for
            # dataset in run whitelist
            wq_jobs = 0
            for unit in units:
                wq_jobs += unit['Jobs']
                # This fails. listRunLumis does not work correctly with DBS3,
                # returning None for the # of lumis in a run
                runLumis = dbs[inputDataset.dbsurl].listRunLumis(
                    block=unit['Inputs'].keys()[0])
                for run in runLumis:
                    if run in getFirstTask(
                            Tier1ReRecoWorkload).inputRunWhitelist():
                        # This is what it is with DBS3 unless we calculate it
                        self.assertEqual(runLumis[run], None)
            self.assertEqual(2, int(wq_jobs))
Exemplo n.º 52
0
    def testContinuousSplittingSupport(self):
        """Can modify successfully policies for continuous splitting"""
        policyInstance = Block(**self.splitArgs)
        self.assertTrue(policyInstance.supportsWorkAddition(),
                        "Block instance should support continuous splitting")
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units, _ = policyInstance(Tier1ReRecoWorkload, task)
            self.assertEqual(47, len(units))
            blocks = []  # fill with blocks as we get work units for them
            inputs = {}
            for unit in units:
                blocks.extend(unit['Inputs'].keys())
                inputs.update(unit['Inputs'])
                self.assertEqual(69, unit['Priority'])
                self.assertTrue(1 <= unit['Jobs'])
                self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
                self.assertEqual(task, unit['Task'])
                self.assertTrue(1 <= unit['NumberOfLumis'])
                self.assertTrue(1 <= unit['NumberOfFiles'])
                self.assertTrue(0 <= unit['NumberOfEvents'])
            self.assertEqual(
                len(units),
                len(dbs[inputDataset.dbsurl].getFileBlocksInfo(dataset)))

        # Modify the spec and task, get first a fresh policy instance
        policyInstance = Block(**self.splitArgs)
        for task in Tier1ReRecoWorkload.taskIterator():
            policyInstance.modifyPolicyForWorkAddition(
                {'ProcessedInputs': inputs.keys()})
            self.assertRaises(WorkQueueNoWorkError, policyInstance,
                              Tier1ReRecoWorkload, task)

        # Run one last time
        policyInstance = Block(**self.splitArgs)
        for task in Tier1ReRecoWorkload.taskIterator():
            policyInstance.modifyPolicyForWorkAddition(
                {'ProcessedInputs': inputs.keys()})
            self.assertRaises(WorkQueueNoWorkError, policyInstance,
                              Tier1ReRecoWorkload, task)

        return
Exemplo n.º 53
0
    def testInvalidSpecs(self):
        """Specs with no work"""
        # no dataset
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()
        processingSpec = factory.factoryWorkloadConstruction(
            'testProcessingInvalid', rerecoArgs)
        getFirstTask(processingSpec).data.input.dataset = None
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Block(), processingSpec,
                              task)

        # invalid dbs url
        processingSpec = factory.factoryWorkloadConstruction(
            'testProcessingInvalid', rerecoArgs)
        getFirstTask(
            processingSpec
        ).data.input.dataset.dbsurl = 'wrongprot://dbs.example.com'
        for task in processingSpec.taskIterator():
            self.assertRaises(DBSReaderError, Block(), processingSpec, task)

        # dataset non existent
        processingSpec = factory.factoryWorkloadConstruction(
            'testProcessingInvalid', rerecoArgs)
        getFirstTask(
            processingSpec
        ).data.input.dataset.name = "/MinimumBias/FAKE-Filter-v1/RECO"
        for task in processingSpec.taskIterator():
            self.assertRaises(DBSReaderError, Block(), processingSpec, task)

        # invalid run whitelist
        processingSpec = factory.factoryWorkloadConstruction(
            'testProcessingInvalid', rerecoArgs)
        processingSpec.setRunWhitelist([666])  # not in this dataset
        for task in processingSpec.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Block(), processingSpec,
                              task)
Exemplo n.º 54
0
    def testInvalidSpecs(self):
        """Specs with no work"""
        dqmHarvArgs = getRequestArgs()
        # no dataset
        dqmHarvArgs["DQMConfigCacheID"] = createConfig(
            dqmHarvArgs["CouchDBName"])
        factory = DQMHarvestWorkloadFactory()
        DQMHarvWorkload = factory.factoryWorkloadConstruction(
            'NoInputDatasetTest', dqmHarvArgs)
        DQMHarvWorkload.setSiteWhitelist(
            ["T2_XX_SiteA", "T2_XX_SiteB", "T2_XX_SiteC"])
        getFirstTask(DQMHarvWorkload).data.input.dataset = None
        for task in DQMHarvWorkload.taskIterator():
            self.assertRaises(WorkQueueWMSpecError, Dataset(), DQMHarvWorkload,
                              task)

        # invalid dataset name
        DQMHarvWorkload = factory.factoryWorkloadConstruction(
            'InvalidInputDatasetTest', dqmHarvArgs)
        DQMHarvWorkload.setSiteWhitelist(
            ["T2_XX_SiteA", "T2_XX_SiteB", "T2_XX_SiteC"])
        getFirstTask(
            DQMHarvWorkload
        ).data.input.dataset.name = '/MinimumBias/FAKE-Filter-v1/RECO'
        for task in DQMHarvWorkload.taskIterator():
            self.assertRaises(DBSReaderError, Dataset(), DQMHarvWorkload, task)

        # invalid run whitelist
        DQMHarvWorkload = factory.factoryWorkloadConstruction(
            'InvalidRunNumberTest', dqmHarvArgs)
        DQMHarvWorkload.setSiteWhitelist(
            ["T2_XX_SiteA", "T2_XX_SiteB", "T2_XX_SiteC"])
        DQMHarvWorkload.setRunWhitelist([666])  # not in this dataset
        for task in DQMHarvWorkload.taskIterator():
            self.assertRaises(WorkQueueNoWorkError, Dataset(), DQMHarvWorkload,
                              task)
Exemplo n.º 55
0
    def testLumiSplitTier1ReRecoWorkload(self):
        """Tier1 Re-reco workflow split by Lumi"""
        splitArgs = dict(SliceType='NumberOfLumis', SliceSize=2)

        Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.setStartPolicy('Dataset', **splitArgs)
        inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
        for task in Tier1ReRecoWorkload.taskIterator():
            units = Dataset(**splitArgs)(Tier1ReRecoWorkload, task)
            self.assertEqual(1, len(units))
            for unit in units:
                self.assertEqual(4, unit['Jobs'])
Exemplo n.º 56
0
 def testTier1ReRecoWorkload(self):
     """Tier1 Re-reco workflow"""
     Tier1ReRecoWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs)
     Tier1ReRecoWorkload.setStartPolicy('Dataset', **self.splitArgs)
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                              inputDataset.tier)
     dbs = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         units = Dataset(**self.splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(1, len(units))
         for unit in units:
             self.assertEqual(4, unit['Jobs'])
             self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
             self.assertEqual(task, unit['Task'])
             self.assertEqual(unit['Inputs'].keys(), [dataset])
             self.assertEqual(40, unit['NumberOfLumis'])
             self.assertEqual(20, unit['NumberOfFiles'])
             self.assertEqual(20000, unit['NumberOfEvents'])
Exemplo n.º 57
0
    def testLumiMask(self):
        """Lumi mask test"""
        rerecoArgs2 = {}
        rerecoArgs2.update(rerecoArgs)
        rerecoArgs2["ConfigCacheID"] = createConfig(rerecoArgs2["CouchDBName"])
        dummyWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2,
                                       assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})

        # Block blacklist
        lumiWorkload = rerecoWorkload('ReRecoWorkload', rerecoArgs2,
                                      assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        task = getFirstTask(lumiWorkload)
        # task.data.input.splitting.runs = ['1']
        task.data.input.splitting.runs = ['180992']
        task.data.input.splitting.lumis = ['1,1']
        units, rejectedWork, badWork = Block(**self.splitArgs)(lumiWorkload, task)
        self.assertEqual(len(units), 1)
        self.assertEqual(len(rejectedWork), 46)
        self.assertEqual(len(badWork), 0)
Exemplo n.º 58
0
    def testBasicProductionWorkload(self):
        """Basic Production Workload"""
        # change split defaults for this test
        totalevents = 1000000
        splitArgs = dict(SliceType='NumberOfEvents',
                         SliceSize=100,
                         MaxJobsPerElement=5)
        mcArgs["EventsPerJob"] = 100
        BasicProductionWorkload = monteCarloWorkload('MonteCarloWorkload',
                                                     mcArgs)
        getFirstTask(BasicProductionWorkload).setSiteWhitelist(
            ['T2_XX_SiteA', 'T2_XX_SiteB'])
        getFirstTask(BasicProductionWorkload).addProduction(
            totalEvents=totalevents)
        getFirstTask(BasicProductionWorkload).setSiteWhitelist(
            ['T2_XX_SiteA', 'T2_XX_SiteB'])
        for task in BasicProductionWorkload.taskIterator():
            units, _, _ = MonteCarlo(**splitArgs)(BasicProductionWorkload,
                                                  task)

            SliceSize = BasicProductionWorkload.startPolicyParameters(
            )['SliceSize']
            self.assertEqual(
                math.ceil(totalevents /
                          (SliceSize * splitArgs['MaxJobsPerElement'])),
                len(units))
            first_event = 1
            first_lumi = 1
            first_run = 1
            for unit in units:
                self.assertTrue(unit['Jobs'] <= splitArgs['MaxJobsPerElement'])
                self.assertEqual(unit['WMSpec'], BasicProductionWorkload)
                self.assertEqual(unit['Task'], task)
                self.assertEqual(unit['Mask']['FirstEvent'], first_event)
                self.assertEqual(unit['Mask']['FirstLumi'], first_lumi)
                last_event = first_event + (SliceSize * unit['Jobs']) - 1
                if last_event > totalevents:
                    # this should be the last unit of work
                    last_event = totalevents
                last_lumi = first_lumi + unit['Jobs'] - 1
                self.assertEqual(unit['Mask']['LastEvent'], last_event)
                self.assertEqual(unit['Mask']['LastLumi'], last_lumi)
                self.assertEqual(unit['Mask']['FirstRun'], first_run)
                self.assertEqual(last_lumi - first_lumi + 1,
                                 unit['NumberOfLumis'])
                self.assertEqual(last_event - first_event + 1,
                                 unit['NumberOfEvents'])
                first_event = last_event + 1
                first_lumi += unit['Jobs']  # one lumi per job
            self.assertEqual(unit['Mask']['LastEvent'], totalevents)
Exemplo n.º 59
0
    def testGetMaskedBlocks(self):
        """
        _testGetMaskedBlocks_

        Check that getMaskedBlocks is returning the correct information
        """

        Globals.GlobalParams.setNumOfRunsPerFile(3)
        Globals.GlobalParams.setNumOfLumisPerBlock(5)
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        inputDataset = task.inputDataset()
        inputDataset.primary = 'SingleElectron'
        inputDataset.processed = 'StoreResults-Run2011A-WElectron-PromptSkim-v4-ALCARECO-NOLC-36cfce5a1d3f3ab4df5bd2aa0a4fa380'
        inputDataset.tier = 'USER'

        task.data.input.splitting.runs = [166921, 166429, 166911]
        task.data.input.splitting.lumis = ['40,70', '1,50', '1,5,16,20']
        lumiMask = LumiList(
            compactList={
                '166921': [[40, 70]],
                '166429': [[1, 50]],
                '166911': [[1, 5], [16, 20]],
            })
        inputLumis = LumiList(compactList={
            '166921': [[1, 67]],
            '166429': [[1, 91]],
            '166911': [[1, 104]],
        })
        dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed,
                                 inputDataset.tier)
        dbs = DBSReader(inputDataset.dbsurl)
        maskedBlocks = Block(**self.splitArgs).getMaskedBlocks(
            task, dbs, dataset)
        for block, files in maskedBlocks.items():
            for file, lumiList in files.items():
                self.assertEqual(str(lumiList), str(inputLumis & lumiMask))
Exemplo n.º 60
0
 def testTier1ReRecoWorkload(self):
     """Tier1 Re-reco workflow"""
     rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
     factory = ReRecoWorkloadFactory()
     Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
     Tier1ReRecoWorkload.setStartPolicy('Dataset', **self.splitArgs)
     inputDataset = getFirstTask(Tier1ReRecoWorkload).inputDataset()
     dataset = "/%s/%s/%s" % (inputDataset.primary, inputDataset.processed, inputDataset.tier)
     dummyDBS = {inputDataset.dbsurl: DBSReader(inputDataset.dbsurl)}
     for task in Tier1ReRecoWorkload.taskIterator():
         units, _ = Dataset(**self.splitArgs)(Tier1ReRecoWorkload, task)
         self.assertEqual(1, len(units))
         for unit in units:
             self.assertEqual(15, unit['Jobs'])
             self.assertEqual(Tier1ReRecoWorkload, unit['WMSpec'])
             self.assertEqual(task, unit['Task'])
             self.assertEqual(unit['Inputs'].keys(), [dataset])
             self.assertEqual(4855, unit['NumberOfLumis'])
             self.assertEqual(72, unit['NumberOfFiles'])
             self.assertEqual(743201, unit['NumberOfEvents'])