Пример #1
0
    def testGetMaskedBlocks(self):
        """
        _testGetMaskedBlocks_

        Check that getMaskedBlocks is returning the correct information
        """

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        inputDataset = task.inputDataset()
        inputDataset.primary = 'SingleElectron'
        inputDataset.processed = 'StoreResults-Run2011A-WElectron-PromptSkim-v4-ALCARECO-NOLC-36cfce5a1d3f3ab4df5bd2aa0a4fa380'
        inputDataset.tier = 'USER'

        task.data.input.splitting.runs = [166921, 166429, 166911]
        task.data.input.splitting.lumis = ['40,70', '1,50', '1,5,16,20']
        lumiMask = LumiList(compactList={'166921': [[40, 70]], '166429': [[1, 50]], '166911': [[1, 5], [16, 20]], })
        inputLumis = LumiList(compactList={'166921': [[1, 67]], '166429': [[1, 91]], '166911': [[1, 104]], })
        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                 inputDataset.processed,
                                 inputDataset.tier)
        dbs = DBSReader(inputDataset.dbsurl)
        maskedBlocks = Block(**self.splitArgs).getMaskedBlocks(task, dbs, dataset)
        for dummyBlock, files in maskedBlocks.iteritems():
            for dummyFile, lumiList in files.iteritems():
                self.assertEqual(str(lumiList), str(inputLumis & lumiMask))
Пример #2
0
    def getMaskedBlocks(self, task, dbs, datasetPath):
        """ Get the blocks which pass the lumi mask restrictions. For each block return the list of lumis
            which were ok (given the lumi mask). The data structure returned is the following:

            {
                "block1" : {"file1" : LumiList(), "file5" : LumiList(), ...}
                "block2" : {"file2" : LumiList(), "file7" : LumiList(), ...}
            }

        """
        # Get the task mask as a LumiList object to make operations easier
        maskedBlocks = {}
        taskMask = task.getLumiMask()

        # for performance reasons, we first get all the blocknames
        blocks = [
            x['block_name'] for x in dbs.dbs.listBlocks(dataset=datasetPath)
        ]

        for block in blocks:
            fileLumis = dbs.dbs.listFileLumis(block_name=block,
                                              validFileOnly=1)
            for fileLumi in fileLumis:
                lfn = fileLumi['logical_file_name']
                runNumber = str(fileLumi['run_num'])
                lumis = fileLumi['lumi_section_num']
                fileMask = LumiList(runsAndLumis={runNumber: lumis})
                commonMask = taskMask & fileMask
                if commonMask:
                    maskedBlocks.setdefault(block, {})
                    maskedBlocks[block].setdefault(lfn, LumiList())
                    maskedBlocks[block][lfn] += commonMask

        return maskedBlocks
Пример #3
0
def getLumiList(lumi_mask_name, logger=None):
    """
    Takes a lumi-mask and returns a LumiList object.
    lumi-mask: either an http address or a json file on disk.
    """
    lumi_list = None
    parts = urlparse.urlparse(lumi_mask_name)
    if parts[0] in ['http', 'https']:
        if logger:
            logger.debug('Downloading lumi-mask from %s' % lumi_mask_name)
        try:
            lumi_list = LumiList(url=lumi_mask_name)
        except urllib2.HTTPError as err:
            raise ConfigurationException(
                "Problem downloading lumi-mask file; %s %s" %
                (err.code, err.msg))
    else:
        if logger:
            logger.debug('Reading lumi-mask from %s' % lumi_mask_name)
        try:
            lumi_list = LumiList(filename=lumi_mask_name)
        except IOError as err:
            raise ConfigurationException("Problem loading lumi-mask file; %s" %
                                         str(err))

    return lumi_list
Пример #4
0
    def testWithMaskedBlocks(self):
        """
        _testWithMaskedBlocks_

        Test job splitting with masked blocks
        """

        Tier1ReRecoWorkload = rerecoWorkload(
            'ReRecoWorkload',
            rerecoArgs,
            assignArgs={'SiteWhitelist': ['T2_XX_SiteA']})
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        dummyDataset = task.inputDataset()

        task.data.input.splitting.runs = [181061, 180899]
        task.data.input.splitting.lumis = ['1,50,60,70', '1,1']
        lumiMask = LumiList(compactList={
            '206371': [[1, 50], [60, 70]],
            '180899': [[1, 1]],
        })

        units, dummyRejectedWork = Block(**self.splitArgs)(Tier1ReRecoWorkload,
                                                           task)

        nLumis = 0
        for unit in units:
            nLumis += unit['NumberOfLumis']

        self.assertEqual(len(lumiMask.getLumis()), nLumis)
Пример #5
0
    def testWithMaskedBlocks(self):
        """
        _testWithMaskedBlocks_

        Test job splitting with masked blocks
        """

        Globals.GlobalParams.setNumOfRunsPerFile(3)
        Globals.GlobalParams.setNumOfLumisPerBlock(5)
        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        inputDataset = task.inputDataset()

        task.data.input.splitting.runs = [181061, 180899]
        task.data.input.splitting.lumis = ['1,50,60,70', '1,1']
        lumiMask = LumiList(compactList = {'206371': [[1, 50], [60,70]], '180899':[[1,1]], } )

        dataset = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed,
                                     inputDataset.tier)
        dbs = {inputDataset.dbsurl : DBSReader(inputDataset.dbsurl)}
        units, rejectedWork = Block(**self.splitArgs)(Tier1ReRecoWorkload, task)

        nLumis = 0
        for unit in units:
            nLumis += unit['NumberOfLumis']

        self.assertEqual(len(lumiMask.getLumis()), nLumis)
Пример #6
0
    def testFilter(self):
        """
        Test filtering of a list of lumis
        """
        runsAndLumis = {
            1: range(1, 34) + [35] + range(37, 48),
            2: range(49, 76) + range(77, 131) + range(133, 137)
        }

        completeList = zip([1]*150, range(1, 150)) + \
                       zip([2]*150, range(1, 150)) + \
                       zip([3]*150, range(1, 150))

        smallList    = zip([1]*50,  range(1, 10)) + zip([2]*50, range(50, 70))
        overlapList  = zip([1]*150, range(30, 40)) + \
                       zip([2]*150, range(60, 80))
        overlapRes   = zip([1]*9,   range(30, 34)) + [(1, 35)] + \
                       zip([1]*9,   range(37, 40)) + \
                       zip([2]*30,  range(60, 76)) + \
                       zip([2]*9,   range(77, 80))

        runLister = LumiList(runsAndLumis = runsAndLumis)

        # Test a list to be filtered which is a superset of constructed list
        filterComplete = runLister.filterLumis(completeList)
        # Test a list to be filtered which is a subset of constructed list
        filterSmall    = runLister.filterLumis(smallList)
        # Test a list to be filtered which is neither
        filterOverlap  = runLister.filterLumis(overlapList)

        self.assertTrue(filterComplete == runLister.getLumis())
        self.assertTrue(filterSmall    == smallList)
        self.assertTrue(filterOverlap  == overlapRes)
Пример #7
0
    def testOr(self):
        """
        a|b for lots of cases
        """

        alumis = {'1' : range(2,20) + range(31,39) + range(45,49),
                  '2' : range(6,20) + range (30,40),
                  '3' : range(10,20) + range (30,40) + range(50,60),
                 }
        blumis = {'1' : range(1,6) + range(12,13) + range(16,30) + range(40,50) + range(39,80),
                  '2' : range(10,35),
                  '3' : range(10,15) + range(35,40) + range(45,51) + range(59,70),
                 }
        clumis = {'1' : range(1,6) + range(12,13) + range(16,30) + range(40,50) + range(39,80),
                  '2' : range(10,35),
                 }
        result = {'1' : range(2,20) + range(31,39) + range(45,49) + range(1,6) + range(12,13) + range(16,30) + range(40,50) + range(39,80),
                  '2' : range(6,20) + range (30,40) + range(10,35),
                  '3' : range(10,20) + range (30,40) + range(50,60) + range(10,15) + range(35,40) + range(45,51) + range(59,70),
                 }
        a = LumiList(runsAndLumis = alumis)
        b = LumiList(runsAndLumis = blumis)
        c = LumiList(runsAndLumis = blumis)
        r = LumiList(runsAndLumis = result)
        self.assertTrue((a|b).getCMSSWString() == r.getCMSSWString())
        self.assertTrue((a|b).getCMSSWString() == (b|a).getCMSSWString())
        self.assertTrue((a|b).getCMSSWString() == (a+b).getCMSSWString())

        # Test list constuction (faster)

        multiple = [alumis, blumis, clumis]
        easy = LumiList(runsAndLumis = multiple)
        hard = a + b
        hard += c
        self.assertTrue(hard.getCMSSWString() == easy.getCMSSWString())
Пример #8
0
    def testWithMaskedBlocks(self):
        """
        _testWithMaskedBlocks_

        Test job splitting with masked blocks
        """

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction('ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        dummyDataset = task.inputDataset()

        task.data.input.splitting.runs = [181061, 180899]
        task.data.input.splitting.lumis = ['1,50,60,70', '1,1']
        lumiMask = LumiList(compactList={'206371': [[1, 50], [60, 70]], '180899': [[1, 1]], })

        units, dummyRejectedWork = Block(**self.splitArgs)(Tier1ReRecoWorkload, task)

        nLumis = 0
        for unit in units:
            nLumis += unit['NumberOfLumis']

        self.assertEqual(len(lumiMask.getLumis()), nLumis)
Пример #9
0
def fast_getDoubleLumis(lumisDict):
    doubleLumis = set()
    for run, lumis in lumisDict.iteritems():
        seen = set()
        doubleLumis.update(set((run, lumi) for lumi in lumis if (run, lumi) in seen or seen.add((run, lumi))))
    doubleLumis = LumiList(lumis=doubleLumis)
    return doubleLumis.getCompactList()
Пример #10
0
 def addJobs(self, jobs):
     if self.algo == 'FileBased':
         self.lumisPerJob += [
             sum([x.get('lumiCount', 0) for x in job['input_files']])
             for job in jobs
         ]
         self.eventsPerJob += [
             sum([x['events'] for x in job['input_files']]) for job in jobs
         ]
         self.filesPerJob += [len(job['input_files']) for job in jobs]
     elif self.algo == 'EventBased':
         self.lumisPerJob += [
             job['mask']['LastLumi'] - job['mask']['FirstLumi']
             for job in jobs
         ]
         self.eventsPerJob += [
             job['mask']['LastEvent'] - job['mask']['FirstEvent']
             for job in jobs
         ]
     else:
         for job in jobs:
             avgEventsPerLumi = sum([
                 f['avgEvtsPerLumi'] for f in job['input_files']
             ]) / float(len(job['input_files']))
             lumis = LumiList(compactList=job['mask']['runAndLumis'])
             self.lumisPerJob.append(len(lumis.getLumis()))
             self.eventsPerJob.append(avgEventsPerLumi *
                                      self.lumisPerJob[-1])
Пример #11
0
def makeLumiList(lumiString):
    try:
        compactList = json.loads(lumiString)
        ll = LumiList(compactList = compactList)
        return ll.getCompactList()
    except:
        raise WMWorkloadToolsException("Could not parse LumiList")
Пример #12
0
    def testAnd(self):
        """
        a&b for lots of cases
        """

        alumis = {'1' : range(2,20) + range(31,39) + range(45,49),
                  '2' : range(6,20) + range (30,40),
                  '3' : range(10,20) + range (30,40) + range(50,60),
                  '4' : range(1,100),
                 }
        blumis = {'1' : range(1,6) + range(12,13) + range(16,25) + range(25,40) + range(40,50) + range(33,36),
                  '2' : range(10,35),
                  '3' : range(10,15) + range(35,40) + range(45,51) + range(59,70),
                  '5' : range(1,100),
                 }
        result = {'1' : range(2,6) + range(12,13) + range(16,20) + range(31,39) + range(45,49),
                  '2' : range(10,20) + range(30,35),
                  '3' : range(10,15) + range(35,40) + range(50,51)+ range(59,60),
                 }
        a = LumiList(runsAndLumis = alumis)
        b = LumiList(runsAndLumis = blumis)
        r = LumiList(runsAndLumis = result)
        self.assertTrue((a&b).getCMSSWString() == r.getCMSSWString())
        self.assertTrue((a&b).getCMSSWString() == (b&a).getCMSSWString())
        self.assertTrue((a|b).getCMSSWString() != r.getCMSSWString())
Пример #13
0
    def testWithMaskedBlocks(self):
        """
        _testWithMaskedBlocks_

        Test job splitting with masked blocks
        """

        rerecoArgs["ConfigCacheID"] = createConfig(rerecoArgs["CouchDBName"])
        factory = ReRecoWorkloadFactory()

        Tier1ReRecoWorkload = factory.factoryWorkloadConstruction(
            'ReRecoWorkload', rerecoArgs)
        Tier1ReRecoWorkload.data.request.priority = 69
        task = getFirstTask(Tier1ReRecoWorkload)
        dummyDataset = task.inputDataset()

        task.data.input.splitting.runs = [181061, 180899]
        task.data.input.splitting.lumis = ['1,50,60,70', '1,1']
        lumiMask = LumiList(compactList={
            '206371': [[1, 50], [60, 70]],
            '180899': [[1, 1]],
        })

        units, dummyRejectedWork = Block(**self.splitArgs)(Tier1ReRecoWorkload,
                                                           task)

        nLumis = 0
        for unit in units:
            nLumis += unit['NumberOfLumis']

        self.assertEqual(len(lumiMask.getLumis()), nLumis)
Пример #14
0
    def mergeLumis(inputdata, lumimask):
        """
        Computes the processed lumis, merges if needed and returns the compacted list (called when usedbs=no).
        """
        doubleLumis = set()
        mergedLumis = set()

        #merge the lumis from single files
        for reports in inputdata.values():
            for report in reports:
                for run, lumis in literal_eval(report['runlumi']).iteritems():
                    for lumi in lumis:
                        if (run,lumi) in mergedLumis:
                            doubleLumis.add((run,lumi))
                        mergedLumis.add((run,lumi))

        #convert the runlumis from list of pairs to dict: [(123,3), (123,4), (123,5), (123,7), (234,6)] => {123 : [3,4,5,7], 234 : [6]}
        dLumisDict = {}
        mLumisDict = {}
        for k, v in doubleLumis:
            dLumisDict.setdefault(k, []).append(int(v))
        for k, v in mergedLumis:
            mLumisDict.setdefault(k, []).append(int(v))

        doubleLumis = LumiList(runsAndLumis=dLumisDict)
        mergedLumis = LumiList(runsAndLumis=mLumisDict)

        #get the compact list using CMSSW framework
        return mergedLumis.getCompactList(), (LumiList(compactList=lumimask) - mergedLumis).getCompactList(), doubleLumis.getCompactList()
Пример #15
0
 def getDoubleLumis(lumisDict):
     #calculate lumis counted twice
     doubleLumis = set()
     for run, lumis in lumisDict.iteritems():
         seen = set()
         doubleLumis.update(set((run, lumi) for lumi in lumis if (run, lumi) in seen or seen.add((run, lumi))))
     doubleLumis = LumiList(lumis=doubleLumis)
     return doubleLumis.getCompactList()
Пример #16
0
 def testWrite(self):
     alumis = {'1' : range(2,20) + range(31,39) + range(45,49),
               '2' : range(6,20) + range (30,40),
               '3' : range(10,20) + range (30,40) + range(50,60),
               '4' : range(1,100),
              }
     a = LumiList(runsAndLumis = alumis)
     a.writeJSON('newFile.json')
Пример #17
0
def makeLumiList(lumiDict):
    try:
        if isinstance(lumiDict, basestring):
            lumiDict = JsonWrapper.loads(lumiDict)
        ll = LumiList(compactList=lumiDict)
        return ll.getCompactList()
    except:
        raise WMSpecFactoryException("Could not parse LumiList, %s: %s" % (type(lumiDict), lumiDict))
Пример #18
0
 def testWrite(self):
     alumis = {'1' : range(2,20) + range(31,39) + range(45,49),
               '2' : range(6,20) + range (30,40),
               '3' : range(10,20) + range (30,40) + range(50,60),
               '4' : range(1,100),
              }
     a = LumiList(runsAndLumis = alumis)
     a.writeJSON('newFile.json')
Пример #19
0
 def subtractLumis(input, output):
     """
     Computes the processed lumis, merges from the DBS reuslts (called when usedbs=yes).
     """
     out = LumiList(runsAndLumis=output)
     in_ = LumiList(runsAndLumis=input)
     diff = in_ - out
     return out.getCompactList(), diff.getCompactList()
Пример #20
0
 def testWrite(self):
     alumis = {
         "1": range(2, 20) + range(31, 39) + range(45, 49),
         "2": range(6, 20) + range(30, 40),
         "3": range(10, 20) + range(30, 40) + range(50, 60),
         "4": range(1, 100),
     }
     a = LumiList(runsAndLumis=alumis)
     a.writeJSON("newFile.json")
Пример #21
0
    def testDuplicates(self):
        """
        Test a list with lots of duplicates
        """
        result = list(zip([1]*100, range(1, 34) + range(37, 48)))
        lumis  = list(zip([1]*100, range(1, 34) + range(37, 48) + range(5, 25)))

        lister = LumiList(lumis = lumis)
        self.assertTrue(lister.getLumis() == result)
Пример #22
0
 def getDoubleLumis(lumisDict):
     #calculate lumis counted twice
     doubleLumis = set()
     for run, lumis in lumisDict.iteritems():
         for lumi in lumis:
             if lumisDict[run].count(lumi) > 1:
                 doubleLumis.add((run,lumi))
     doubleLumis = LumiList(lumis=doubleLumis)
     return doubleLumis.getCompactList()
Пример #23
0
    def testDuplicates(self):
        """
        Test a list with lots of duplicates
        """
        result = zip([1]*100, range(1, 34) + range(37, 48))
        lumis  = zip([1]*100, range(1, 34) + range(37, 48) + range(5, 25))

        lister = LumiList(lumis = lumis)
        self.assertTrue(lister.getLumis() == result)
Пример #24
0
def fast_getDoubleLumis(lumisDict):
    doubleLumis = set()
    for run, lumis in lumisDict.iteritems():
        seen = set()
        doubleLumis.update(
            set((run, lumi) for lumi in lumis
                if (run, lumi) in seen or seen.add((run, lumi))))
    doubleLumis = LumiList(lumis=doubleLumis)
    return doubleLumis.getCompactList()
Пример #25
0
def makeLumiList(lumiDict):
    try:
        if isinstance(lumiDict, (str, bytes)):
            lumiDict = json.loads(lumiDict)
        ll = LumiList(compactList=lumiDict)
        return ll.getCompactList()
    except:
        raise WMSpecFactoryException("Could not parse LumiList, %s: %s" %
                                     (type(lumiDict), lumiDict))
Пример #26
0
    def removeLumiList(self, lumiList):
        """
        Remove a lumi list from this data structure

        This requires conversion to LumiList to do the lumi algebra an
        may be computationally expensive for a large number of lumis.
        """
        myLumis = LumiList(compactList=self['runAndLumis'])
        myLumis = myLumis - lumiList
        self['runAndLumis'] = myLumis.getCompactList()
Пример #27
0
    def removeLumiList(self, lumiList):
        """
        Remove a lumi list from this data structure

        This requires conversion to LumiList to do the lumi algebra an
        may be computationally expensive for a large number of lumis.
        """
        myLumis = LumiList(compactList=self['runAndLumis'])
        myLumis = myLumis - lumiList
        self['runAndLumis'] = myLumis.getCompactList()
Пример #28
0
    def testNull(self):
        """
        Test a null list
        """

        runLister = LumiList(lumis = None)

        self.assertTrue(runLister.getCMSSWString() == '')
        self.assertTrue(runLister.getLumis() == [])
        self.assertTrue(runLister.getCompactList() == {})
Пример #29
0
    def testNull(self):
        """
        Test a null list
        """

        runLister = LumiList(lumis = None)

        self.assertTrue(runLister.getCMSSWString() == '')
        self.assertTrue(runLister.getLumis() == [])
        self.assertTrue(runLister.getCompactList() == {})
Пример #30
0
    def testOr(self):
        """
        a|b for lots of cases
        """

        alumis = {'1' : range(2,20) + range(31,39) + range(45,49),
                  '2' : range(6,20) + range (30,40),
                  '3' : range(10,20) + range (30,40) + range(50,60),
                 }
        blumis = {'1' : range(1,6) + range(12,13) + range(16,30) + range(40,50) + range(39,80),
                  '2' : range(10,35),
                  '3' : range(10,15) + range(35,40) + range(45,51) + range(59,70),
                 }
        clumis = {'1' : range(1,6) + range(12,13) + range(16,30) + range(40,50) + range(39,80),
                  '2' : range(10,35),
                 }
        result = {'1' : range(2,20) + range(31,39) + range(45,49) + range(1,6) + range(12,13) + range(16,30) + range(40,50) + range(39,80),
                  '2' : range(6,20) + range (30,40) + range(10,35),
                  '3' : range(10,20) + range (30,40) + range(50,60) + range(10,15) + range(35,40) + range(45,51) + range(59,70),
                 }
        a = LumiList(runsAndLumis = alumis)
        b = LumiList(runsAndLumis = blumis)
        c = LumiList(runsAndLumis = blumis)
        r = LumiList(runsAndLumis = result)
        self.assertTrue((a|b).getCMSSWString() == r.getCMSSWString())
        self.assertTrue((a|b).getCMSSWString() == (b|a).getCMSSWString())
        self.assertTrue((a|b).getCMSSWString() == (a+b).getCMSSWString())

        # Test list constuction (faster)

        multiple = [alumis, blumis, clumis]
        easy = LumiList(runsAndLumis = multiple)
        hard = a + b
        hard += c
        self.assertTrue(hard.getCMSSWString() == easy.getCMSSWString())
Пример #31
0
    def adjustLumisForCompletion(self, task, unprocessed):
        """Sets the run, lumi information in the task information for the
        completion jobs.  Returns True if completion jobs are needed,
        otherwise False.
        """
        missingDir = "automatic_splitting/missing_lumis/" #TODO in ServerUtilities to be shared with PJ

        try:
            available = set(os.listdir(missingDir)) & unprocessed
        except OSError:
            available = set()

        failed = set(self.failedJobs) & unprocessed

        if len(available) == 0 and len(failed) == 0:
            return False

        missing = LumiList()
        for missingFile in available:
            with open(os.path.join(missingDir, missingFile)) as fd:
                self.logger.info("Adding missing lumis from job %s", missingFile)
                missing = missing + LumiList(compactList=literal_eval(fd.read()))
        for failedId in failed:
            f = None
            try:
                tmpdir = tempfile.mkdtemp()
                f = tarfile.open("run_and_lumis.tar.gz")
                fn = "job_lumis_{0}.json".format(failedId)
                f.extract(fn, path=tmpdir)
                with open(os.path.join(tmpdir, fn)) as fd:
                    injson = json.load(fd)
                    missing = missing + LumiList(compactList=injson)
                    self.logger.info("Adding lumis from failed job %s", failedId)
            finally:
                if f:
                    f.close()
                shutil.rmtree(tmpdir)
        missing_compact = missing.getCompactList()
        runs = missing.getRuns()
        # Compact list is like
        # {
        # '1': [[1, 33], [35, 35], [37, 47], [49, 75], [77, 130], [133, 136]],
        # '2':[[1,45],[50,80]]
        # }
        # Now we turn lumis it into something like:
        # lumis=['1, 33, 35, 35, 37, 47, 49, 75, 77, 130, 133, 136','1,45,50,80']
        # which is the format expected by buildLumiMask in the splitting algorithm
        lumis = [",".join(str(l) for l in functools.reduce(lambda x, y:x + y, missing_compact[run])) for run in runs]

        task['tm_split_args']['runs'] = runs
        task['tm_split_args']['lumis'] = lumis

        return True
Пример #32
0
    def adjust(self, parameters, inputs, outputs, se):
        local = self._local
        if local and se.transfer_inputs():
            inputs += [(se.local(f), os.path.basename(f), False) for id, f in self._files if f]
        if se.transfer_outputs():
            outputs += [(se.local(rf), os.path.basename(lf)) for lf, rf in self.outputs]

        parameters['mask']['files'] = self.input_files
        parameters['output files'] = self.outputs
        if not self._file_based:
            ls = LumiList(lumis=set([(run, lumi) for (id, file, run, lumi) in self._units]))
            parameters['mask']['lumis'] = ls.getCompactList()
Пример #33
0
 def addJobs(self, jobs):
     if self.algo == 'FileBased':
         self.lumisPerJob += [sum([x.get('lumiCount', 0) for x in job['input_files']]) for job in jobs]
         self.eventsPerJob += [sum([x['events'] for x in job['input_files']]) for job in jobs]
     elif self.algo == 'EventBased':
         self.lumisPerJob += [job['mask']['LastLumi'] - job['mask']['FirstLumi'] for job in jobs]
         self.eventsPerJob += [job['mask']['LastEvent'] - job['mask']['FirstEvent'] for job in jobs]
     else:
         for job in jobs:
             avgEventsPerLumi = sum([f['avgEvtsPerLumi'] for f in job['input_files']])/float(len(job['input_files']))
             lumis = LumiList(compactList=job['mask']['runAndLumis'])
             self.lumisPerJob.append(len(lumis.getLumis()))
             self.eventsPerJob.append(avgEventsPerLumi * self.lumisPerJob[-1])
Пример #34
0
 def mergeLumis(inputdata):
     """
     Computes the processed lumis, merges if needed and returns the compacted list.
     """
     mergedLumis = set()
     #merge the lumis from single files
     for reports in inputdata.values():
         for report in reports:
             for run, lumis in literal_eval(report['runlumi']).iteritems():
                 for lumi in lumis:
                     mergedLumis.add((run,int(lumi))) #lumi is str, but need int
     mergedLumis = LumiList(lumis=mergedLumis)
     return mergedLumis.getCompactList()
Пример #35
0
 def mergeLumis(inputdata):
     """
     Computes the processed lumis, merges if needed and returns the compacted list.
     """
     mergedLumis = set()
     #merge the lumis from single files
     for reports in inputdata.values():
         for report in reports:
             for run, lumis in literal_eval(report['runlumi']).iteritems():
                 for lumi in lumis:
                     mergedLumis.add((run,int(lumi))) #lumi is str, but need int
     mergedLumis = LumiList(lumis=mergedLumis)
     return mergedLumis.getCompactList()
Пример #36
0
    def getMaskedBlocks(self, task, dbs, datasetPath):
        """ Get the blocks which pass the lumi mask restrictions. For each block return the list of lumis
            which were ok (given the lumi mask). The data structure returned is the following:

            {
                "block1" : {"file1" : LumiList(), "file5" : LumiList(), ...}
                "block2" : {"file2" : LumiList(), "file7" : LumiList(), ...}
            }

        """

        # Get mask and convert to LumiList to make operations easier
        maskedBlocks = {}
        lumiMask = task.getLumiMask()
        taskMask = LumiList(compactList=lumiMask)

        # Find all the files that have runs and lumis we are interested in,
        # fill block lfn part of maskedBlocks

        for run, lumis in lumiMask.items():
            files = []
            for slumis in Lexicon.slicedIterator(lumis, 50):
                slicedFiles = dbs.dbs.listFileArray(dataset=datasetPath,
                                                    run_num=run,
                                                    lumi_list=slumis,
                                                    detail=True)
                files.extend(slicedFiles)
            for file in files:
                blockName = file['block_name']
                fileName = file['logical_file_name']
                if blockName not in maskedBlocks:
                    maskedBlocks[blockName] = {}
                if fileName not in maskedBlocks[blockName]:
                    maskedBlocks[blockName][fileName] = LumiList()

        # Fill maskedLumis part of maskedBlocks

        for block in maskedBlocks:
            fileLumis = dbs.dbs.listFileLumis(block_name=block,
                                              validFileOnly=1)
            for fileLumi in fileLumis:
                lfn = fileLumi['logical_file_name']
                # For each run : [lumis] mask by needed lumis, append to maskedBlocks
                if maskedBlocks[block].get(lfn, None) is not None:
                    lumiList = LumiList(
                        runsAndLumis={
                            fileLumi['run_num']: fileLumi['lumi_section_num']
                        })
                    maskedBlocks[block][lfn] += (lumiList & taskMask)

        return maskedBlocks
Пример #37
0
    def testRuns(self):
        """
        Test constucting from run and list of lumis
        """
        runsAndLumis = {
            1: range(1, 34) + [35] + range(37, 48),
            2: range(49, 76) + range(77, 131) + range(133, 137)
        }
        runsAndLumis2 = {
            '1': range(1, 34) + [35] + range(37, 48),
            '2': range(49, 76) + range(77, 131) + range(133, 137)
        }
        blank = {
            '1': [],
            '2': []
        }

        jsonLister = LumiList(filename = 'lumiTest.json')
        jsonString = jsonLister.getCMSSWString()
        jsonList   = jsonLister.getCompactList()

        runLister = LumiList(runsAndLumis = runsAndLumis)
        runString = runLister.getCMSSWString()
        runList   = runLister.getCompactList()

        runLister2 = LumiList(runsAndLumis = runsAndLumis2)
        runList2 = runLister2.getCompactList()

        runLister3 = LumiList(runsAndLumis = blank)


        self.assertTrue(jsonString == runString)
        self.assertTrue(jsonList   == runList)
        self.assertTrue(runList2   == runList)
        self.assertTrue(len(runLister3) == 0)
Пример #38
0
    def makeNewJobByWork(self, reason='', failedJob=False):
        """
        Make a new job given the passed in parameters.

        :param reason: Why are we making a new job (debugging only)
        :param failedJob: Make the job as already failed

        :return: nothing
        """

        events = self.eventsInJob
        lumis = self.jobLumis
        files = self.jobFiles

        self.maxLumis = max(self.maxLumis, len(lumis))

        # Transform the lumi list into something compact and usable
        lumiList = LumiList(lumis=lumis).getCompactList()
        logging.debug(
            "Because %s new job with events: %s, lumis: %s, and files: %s",
            reason, events, lumiList, [f['lfn'] for f in files])
        if failedJob:
            logging.debug(" This job will be made failed")
            self.newJob(failedJob=failedJob, failedReason=reason)
        else:
            self.newJob()

        # Calculate and add performance information
        timePerEvent, sizePerEvent, memoryRequirement = self.getPerformanceParameters(
            self.perfParameters)
        self.currentJob.addResourceEstimates(jobTime=events * timePerEvent,
                                             disk=events * sizePerEvent,
                                             memory=memoryRequirement)
        # Add job mask information
        for run, lumiRanges in lumiList.iteritems():
            for lumiRange in lumiRanges:
                self.currentJob['mask'].addRunAndLumis(run=int(run),
                                                       lumis=lumiRange)
        # Add files
        for f in files:
            self.currentJob.addFile(f)
        # Add pileup info if needed
        if self.deterministicPU:
            eventsToSkip = (self.nJobs - 1) * self.maxEvents * self.maxLumis
            logging.debug('Adding baggage to skip %s events', eventsToSkip)
            self.currentJob.addBaggageParameter("skipPileupEvents",
                                                eventsToSkip)

        return
Пример #39
0
    def validFiles(self, files):
        """
        Apply lumi mask and or run white/black list and return files which have
        one or more of the requested lumis
        """
        runWhiteList = self.topLevelTask.inputRunWhitelist()
        runBlackList = self.topLevelTask.inputRunBlacklist()
        lumiMask = self.topLevelTask.getLumiMask()

        blackMask = None
        if lumiMask:  # We have a lumiMask, so use it and modify with run white/black list
            if runWhiteList:
                lumiMask.selectRuns(runWhiteList)
            if runBlackList:
                lumiMask.removeRuns(runBlackList)
        elif runWhiteList:  # We have a run whitelist, subtract off blacklist
            lumiMask = LumiList(runs=runWhiteList)
            if runBlackList:  # We only have a blacklist, so make a black mask out of it instead
                lumiMask.removeRuns(runBlackList)
        else:
            lumiMask = None
            if runBlackList:
                blackMask = LumiList(runs=runBlackList)

        results = []
        for f in files:
            if isinstance(f, basestring) or "LumiList" not in f:
                results.append(f)
                continue

            # Create a LumiList from the WMBS info
            runLumis = {}
            for x in f['LumiList']:
                if x['RunNumber'] in runLumis:
                    runLumis[x['RunNumber']].extend(x['LumiSectionNumber'])
                else:
                    runLumis[x['RunNumber']] = x['LumiSectionNumber']
            fileLumiList = LumiList(runsAndLumis=runLumis)

            if lumiMask:
                if fileLumiList & lumiMask:  # At least one lumi from file is in lumiMask
                    results.append(f)
            elif blackMask:
                if fileLumiList - blackMask:  # At least one lumi from file is not in blackMask
                    results.append(f)
            else:  # There is effectively no mask
                results.append(f)

        return results
Пример #40
0
    def adjust(self, parameters, inputs, outputs, se):
        local = self._local
        if local and se.transfer_inputs():
            inputs += [(se.local(f), os.path.basename(f), False)
                       for id, f in self._files if f]
        if se.transfer_outputs():
            outputs += [(se.local(rf), os.path.basename(lf))
                        for lf, rf in self.outputs]

        parameters['mask']['files'] = self.input_files
        parameters['output files'] = self.outputs
        if not self._file_based:
            ls = LumiList(lumis=set([(run, lumi) for (id, file, run,
                                                      lumi) in self._units]))
            parameters['mask']['lumis'] = ls.getCompactList()
Пример #41
0
def getLumiListInValidFiles(dataset, dbsurl = 'phys03'):
    """
    Get the runs/lumis in the valid files of a given dataset.

    dataset: the dataset name as published in DBS
    dbsurl: the DBS URL or DBS prod instance

    Returns a LumiList object.
    """
    dbsurl = DBSURLS['reader'].get(dbsurl, dbsurl)
    dbs3api = DbsApi(url=dbsurl)
    try:
        files = dbs3api.listFileArray(dataset=dataset, validFileOnly=0, detail=True)
    except Exception as ex:
        msg  = "Got DBS client error requesting details of dataset '%s' on DBS URL '%s': %s" % (dataset, dbsurl, ex)
        msg += "\n%s" % (traceback.format_exc())
        raise ClientException(msg)
    if not files:
        msg = "Dataset '%s' not found in DBS URL '%s'." % (dataset, dbsurl)
        raise ClientException(msg)
    validFiles = [f['logical_file_name'] for f in files if f['is_file_valid']]
    blocks = set([f['block_name'] for f in files])
    runLumiPairs = []
    for blockName in blocks:
        fileLumis = dbs3api.listFileLumis(block_name=blockName)
        for f in fileLumis:
            if f['logical_file_name'] in validFiles:
                run = f['run_num']
                lumis = f['lumi_section_num']
                for lumi in lumis:
                    runLumiPairs.append((run,lumi))
    lumiList = LumiList(lumis=runLumiPairs)

    return lumiList
Пример #42
0
 def getDuplicateLumis(lumisDict):
     """
     Get the run-lumis appearing more than once in the input
     dictionary of runs and lumis, which is assumed to have
     the following format:
         {
         '1': [1,2,3,4,6,7,8,9,10],
         '2': [1,4,5,20]
         }
     """
     doubleLumis = set()
     for run, lumis in lumisDict.iteritems():
         seen = set()
         doubleLumis.update(set((run, lumi) for lumi in lumis if (run, lumi) in seen or seen.add((run, lumi))))
     doubleLumis = LumiList(lumis=doubleLumis)
     return doubleLumis.getCompactList()
Пример #43
0
 def getDuplicateLumis(lumisDict):
     """
     Get the run-lumis appearing more than once in the input
     dictionary of runs and lumis, which is assumed to have
     the following format:
         {
         '1': [1,2,3,4,6,7,8,9,10],
         '2': [1,4,5,20]
         }
     """
     doubleLumis = set()
     for run, lumis in lumisDict.iteritems():
         seen = set()
         doubleLumis.update(set((run, lumi) for lumi in lumis if (run, lumi) in seen or seen.add((run, lumi))))
     doubleLumis = LumiList(lumis=doubleLumis)
     return doubleLumis.getCompactList()
Пример #44
0
    def notestRead(self):
        """
        Test reading from JSON
        """
        exString = "1:1-1:33,1:35,1:37-1:47,2:49-2:75,2:77-2:130,2:133-2:136"
        exDict = {"1": [[1, 33], [35, 35], [37, 47]], "2": [[49, 75], [77, 130], [133, 136]]}
        exVLBR = cms.VLuminosityBlockRange("1:1-1:33", "1:35", "1:37-1:47", "2:49-2:75", "2:77-2:130", "2:133-2:136")

        jsonList = LumiList(filename="lumiTest.json")
        lumiString = jsonList.getCMSSWString()
        lumiList = jsonList.getCompactList()
        lumiVLBR = jsonList.getVLuminosityBlockRange(True)

        self.assertTrue(lumiString == exString)
        self.assertTrue(lumiList == exDict)
        self.assertTrue(lumiVLBR == exVLBR)
Пример #45
0
def getLumiList(lumi_mask_name, logger = None):
    """
    Takes a lumi-mask and returns a LumiList object.
    lumi-mask: either an http address or a json file on disk.
    """
    lumi_list = None
    parts = urlparse.urlparse(lumi_mask_name)
    if parts[0] in ['http', 'https']:
        if logger:
            logger.debug('Downloading lumi-mask from %s' % lumi_mask_name)
        lumi_list = LumiList(url = lumi_mask_name)
    else:
        if logger:
            logger.debug('Reading lumi-mask from %s' % lumi_mask_name)
        lumi_list = LumiList(filename = lumi_mask_name)

    return lumi_list
Пример #46
0
    def notestRead(self):
        """
        Test reading from JSON
        """
        exString = "1:1-1:33,1:35,1:37-1:47,2:49-2:75,2:77-2:130,2:133-2:136"
        exDict   = {'1': [[1, 33], [35, 35], [37, 47]],
                    '2': [[49, 75], [77, 130], [133, 136]]}
        exVLBR   = cms.VLuminosityBlockRange('1:1-1:33', '1:35', '1:37-1:47', '2:49-2:75', '2:77-2:130', '2:133-2:136')

        jsonList = LumiList(filename = 'lumiTest.json')
        lumiString = jsonList.getCMSSWString()
        lumiList = jsonList.getCompactList()
        lumiVLBR = jsonList.getVLuminosityBlockRange(True)

        self.assertTrue(lumiString == exString)
        self.assertTrue(lumiList   == exDict)
        self.assertTrue(lumiVLBR   == exVLBR)
Пример #47
0
    def testSubtract(self):
        """
        a-b for lots of cases
        """

        alumis = {
            '1': range(2, 20) + range(31, 39) + range(45, 49),
            '2': range(6, 20) + range(30, 40),
            '3': range(10, 20) + range(30, 40) + range(50, 60),
        }
        blumis = {
            '1':
            range(1, 6) + range(12, 13) + range(16, 30) + range(40, 50) +
            range(33, 36),
            '2':
            range(10, 35),
            '3':
            range(10, 15) + range(35, 40) + range(45, 51) + range(59, 70),
        }
        clumis = {
            '1':
            range(1, 6) + range(12, 13) + range(16, 30) + range(40, 50) +
            range(33, 36),
            '2':
            range(10, 35),
        }
        result = {
            '1': range(6, 12) + range(13, 16) + range(31, 33) + range(36, 39),
            '2': range(6, 10) + range(35, 40),
            '3': range(15, 20) + range(30, 35) + range(51, 59),
        }
        result2 = {
            '1': range(6, 12) + range(13, 16) + range(31, 33) + range(36, 39),
            '2': range(6, 10) + range(35, 40),
            '3': range(10, 20) + range(30, 40) + range(50, 60),
        }
        a = LumiList(runsAndLumis=alumis)
        b = LumiList(runsAndLumis=blumis)
        c = LumiList(runsAndLumis=clumis)
        r = LumiList(runsAndLumis=result)
        r2 = LumiList(runsAndLumis=result2)

        self.assertTrue((a - b).getCMSSWString() == r.getCMSSWString())
        self.assertTrue((a - b).getCMSSWString() != (b - a).getCMSSWString())
        # Test where c is missing runs from a
        self.assertTrue((a - c).getCMSSWString() == r2.getCMSSWString())
        self.assertTrue((a - c).getCMSSWString() != (c - a).getCMSSWString())
        # Test empty lists
        self.assertTrue(str(a - a) == '{}')
        self.assertTrue(len(a - a) == 0)
Пример #48
0
    def mergeLumis(inputdata, lumimask):
        """
        Computes the processed lumis, merges if needed and returns the compacted list (called when usedbs=no).
        """
        doubleLumis = set()
        mergedLumis = set()

        #merge the lumis from single files
        for reports in inputdata.values():
            for report in reports:
                for run, lumis in literal_eval(report['runlumi']).iteritems():
                    for lumi in lumis:
                        if (run, lumi) in mergedLumis:
                            doubleLumis.add((run, lumi))
                        mergedLumis.add((run, lumi))

        #convert the runlumis from list of pairs to dict: [(123,3), (123,4), (123,5), (123,7), (234,6)] => {123 : [3,4,5,7], 234 : [6]}
        dLumisDict = {}
        mLumisDict = {}
        for k, v in doubleLumis:
            dLumisDict.setdefault(k, []).append(int(v))
        for k, v in mergedLumis:
            mLumisDict.setdefault(k, []).append(int(v))

        doubleLumis = LumiList(runsAndLumis=dLumisDict)
        mergedLumis = LumiList(runsAndLumis=mLumisDict)

        #get the compact list using CMSSW framework
        return mergedLumis.getCompactList(), (
            LumiList(compactList=lumimask) -
            mergedLumis).getCompactList(), doubleLumis.getCompactList()
Пример #49
0
 def getLumilist(self):
     """
     Get the LumiList parameter and return a LumiList object,
     in case the LumiList is not empty.
     """
     lumiDict = self._getValue('LumiList', {})
     if not lumiDict:
         return {}
     return LumiList(compactList=lumiDict)
Пример #50
0
    def makeNewJobByWork(self, reason='', failedJob=False):
        """
        Make a new job given the passed in parameters.

        :param reason: Why are we making a new job (debugging only)
        :param failedJob: Make the job as already failed

        :return: nothing
        """

        events = self.eventsInJob
        lumis = self.jobLumis
        files = self.jobFiles

        self.maxLumis = max(self.maxLumis, len(lumis))

        # Transform the lumi list into something compact and usable
        lumiList = LumiList(lumis=lumis).getCompactList()
        logging.debug("Because %s new job with events: %s, lumis: %s, and files: %s",
                      reason, events, lumiList, [f['lfn'] for f in files])
        if failedJob:
            logging.debug(" This job will be made failed")
            self.newJob(failedJob=failedJob, failedReason=reason)
        else:
            self.newJob()

        # Calculate and add performance information
        timePerEvent, sizePerEvent, memoryRequirement = self.getPerformanceParameters(self.perfParameters)
        self.currentJob.addResourceEstimates(jobTime=events * timePerEvent, disk=events * sizePerEvent,
                                             memory=memoryRequirement)
        # Add job mask information
        for run, lumiRanges in lumiList.iteritems():
            for lumiRange in lumiRanges:
                self.currentJob['mask'].addRunAndLumis(run=int(run), lumis=lumiRange)
        # Add files
        for f in files:
            self.currentJob.addFile(f)
        # Add pileup info if needed
        if self.deterministicPU:
            eventsToSkip = (self.nJobs - 1) * self.maxEvents * self.maxLumis
            logging.debug('Adding baggage to skip %s events', eventsToSkip)
            self.currentJob.addBaggageParameter("skipPileupEvents", eventsToSkip)

        return
Пример #51
0
    def getLumilistWhitelist(self, collectionID, taskName):
        """
        Args:
            collectionID, taskName: Parameters for getLumiWhitelist

        Returns: a LumiList object describing the lumi list from the collection
        """

        lumiList = LumiList(compactList=self.getLumiWhitelist(collectionID, taskName))
        return lumiList
Пример #52
0
    def testAddLumiMask(self):
        """
        _testAddLumiMask_

        Verify that setting and getting the lumiMask objects for a task works correctly.
        Do a round trip of a typical lumi mask
        """
        testTask = makeWMTask("TestTask")

        lumiMask = LumiList(compactList={
            '1': [[1, 33], [35, 35], [37, 47], [49, 75], [77, 130], [133, 136]],
            '2': [[1, 45]],
            '3': [[1, 45], [50, 80]],
        })

        testTask.setLumiMask(lumiMask=lumiMask.getCompactList())
        outMask = testTask.getLumiMask()
        self.assertEqual(lumiMask.getCMSSWString(), outMask.getCMSSWString())

        return
Пример #53
0
    def subtractLumis(input, output):
        """
        Computes the processed lumis, merges from the DBS reuslts (called when usedbs=yes).
        """
        out = LumiList(runsAndLumis=output)
        in_ = LumiList(runsAndLumis=input)
        diff = in_ - out

        #calculate lumis counted twice
        doubleLumis = set()
        for run,lumis in output.iteritems():
            for lumi in lumis:
                if output[run].count(lumi) > 1:
                    doubleLumis.add((run,lumi))
        dLumisDict = {}
        for k, v in doubleLumis:
            dLumisDict.setdefault(k, []).append(v)
        double = LumiList(runsAndLumis=dLumisDict)

        return out.getCompactList(), diff.getCompactList(), double.getCompactList()
Пример #54
0
    def testAddLumiMask(self):
        """
        _testAddLumiMask_

        Verify that setting and getting the lumiMask objects for a task works correctly.
        Do a round trip of a typical lumi mask
        """
        testTask = makeWMTask("TestTask")

        lumiMask = LumiList(compactList = {
                '1': [[1, 33], [35, 35], [37, 47], [49, 75], [77, 130], [133, 136]],
                '2':[[1,45]],
                '3':[[1,45],[50,80]],
            })

        testTask.setLumiMask(lumiMask = lumiMask.getCompactList())
        outMask =  LumiList(compactList = testTask.getLumiMask())
        self.assertEqual(lumiMask.getCMSSWString(), outMask.getCMSSWString())

        return
Пример #55
0
    def testOr(self):
        """
        a|b for lots of cases
        """

        alumis = {
            "1": range(2, 20) + range(31, 39) + range(45, 49),
            "2": range(6, 20) + range(30, 40),
            "3": range(10, 20) + range(30, 40) + range(50, 60),
        }
        blumis = {
            "1": range(1, 6) + range(12, 13) + range(16, 30) + range(40, 50) + range(39, 80),
            "2": range(10, 35),
            "3": range(10, 15) + range(35, 40) + range(45, 51) + range(59, 70),
        }
        clumis = {"1": range(1, 6) + range(12, 13) + range(16, 30) + range(40, 50) + range(39, 80), "2": range(10, 35)}
        result = {
            "1": range(2, 20)
            + range(31, 39)
            + range(45, 49)
            + range(1, 6)
            + range(12, 13)
            + range(16, 30)
            + range(40, 50)
            + range(39, 80),
            "2": range(6, 20) + range(30, 40) + range(10, 35),
            "3": range(10, 20)
            + range(30, 40)
            + range(50, 60)
            + range(10, 15)
            + range(35, 40)
            + range(45, 51)
            + range(59, 70),
        }
        a = LumiList(runsAndLumis=alumis)
        b = LumiList(runsAndLumis=blumis)
        c = LumiList(runsAndLumis=blumis)
        r = LumiList(runsAndLumis=result)
        self.assertTrue((a | b).getCMSSWString() == r.getCMSSWString())
        self.assertTrue((a | b).getCMSSWString() == (b | a).getCMSSWString())
        self.assertTrue((a | b).getCMSSWString() == (a + b).getCMSSWString())
Пример #56
0
def edit_process_source(pset, config):
    """Edit parameter set for task.

    Adjust input files and lumi mask, as well as adding a process summary
    for performance analysis.
    """
    files = config['mask']['files']
    lumis = LumiList(
        compactList=config['mask']['lumis']).getVLuminosityBlockRange()
    want_summary = config['want summary']
    runtime = config.get('task runtime')
    cores = config.get('cores')

    # MC production settings
    run_first = config['mask'].get('first run')
    lumi_first = config['mask'].get('first lumi')
    lumi_events = config['mask'].get('events per lumi')
    seeding = config.get('randomize seeds', False)

    with open(pset, 'a') as fp:
        frag = fragment.format(events=config['mask']['events'])
        if any([f for f in files]) and not config['gridpack']:
            frag += "\nprocess.source.fileNames = cms.untracked.vstring({0})".format(
                repr([str(f) for f in files]))
        if config['gridpack']:
            # ExternalLHEProducer only understands local files and does
            # not expect the `file:` prefix. Also, there can never be
            # more than one gridpack, so take the first element.
            frag += fragment_gridpack.format(
                gridpack=os.path.abspath(files[0].replace('file:', '')))
        if lumis:
            frag += "\nprocess.source.lumisToProcess = cms.untracked.VLuminosityBlockRange({0})".format(
                [str(l) for l in lumis])
        if want_summary:
            frag += fragment_sum
        if runtime:
            frag += fragment_runtime.format(time=runtime)
        if seeding:
            frag += fragment_seeding
        if lumi_events:
            frag += fragment_lumi.format(events=lumi_events)
        if lumi_first:
            frag += fragment_first_lumi.format(lumi=lumi_first)
        if run_first:
            frag += fragment_first_run.format(run=run_first)
        if cores:
            frag += fragment_cores.format(cores=cores)

        logger.info("config file fragment")
        with mangler.output('pset'):
            for l in frag.splitlines():
                logger.debug(l)
        fp.write(frag)
Пример #57
0
    def validFiles(self, files):
        """
        Apply lumi mask and or run white/black list and return files which have
        one or more of the requested lumis
        """
        runWhiteList = self.topLevelTask.inputRunWhitelist()
        runBlackList = self.topLevelTask.inputRunBlacklist()
        taskLumiMask = self.topLevelTask.getLumiMask()

        blackMask = None
        if taskLumiMask:       # We have a lumiMask, so use it and modify with run white/black list
            if isinstance(taskLumiMask, LumiList):  # For a possible future where we use LumiList more prevalently
                lumiMask = copy.deepcopy(taskLumiMask)
            else:
                lumiMask = LumiList(compactList = taskLumiMask)
            if runWhiteList:
                lumiMask.selectRuns(runWhiteList)
            if runBlackList:
                lumiMask.removeRuns(runBlackList)
        elif runWhiteList:    # We have a run whitelist, subtract off blacklist
            lumiMask = LumiList(runs = runWhiteList)
            if runBlackList:  # We only have a blacklist, so make a black mask out of it instead
                lumiMask.removeRuns(runBlackList)
        else:
            lumiMask = None
            if runBlackList:
                blackMask = LumiList(runs = runWhiteList)

        results = []
        for f in files:
            if type(f) == type("") or not f.has_key("LumiList"):
                results.append(f)
                continue

            # Create a LumiList from the WMBS info
            fileRunsAndLumis = {}
            for x in f['LumiList']:
                fileRunsAndLumis.update({str(x['RunNumber']): x['LumiSectionNumber']})
            fileLumiList = LumiList(runsAndLumis = fileRunsAndLumis)

            if lumiMask:
                if fileLumiList & lumiMask:  # At least one lumi from file is in lumiMask
                    results.append(f)
            elif blackMask:
                if fileLumiList - blackMask: # At least one lumi from file is not in blackMask
                    results.append(f)
            else:                            # There is effectively no mask
                results.append(f)

        return results
Пример #58
0
    def adjustLumisForCompletion(self, task, unprocessed):
        """Sets the run, lumi information in the task information for the
        completion jobs.  Returns True if completion jobs are needed,
        otherwise False.
        """
        missingDir = "automatic_splitting/missing_lumis/"  #TODO in ServerUtilities to be shared with PJ

        try:
            available = set(os.listdir(missingDir)) & unprocessed
        except OSError:
            available = set()

        failed = set(self.failedJobs) & unprocessed

        if len(available) == 0 and len(failed) == 0:
            return False

        missing = LumiList()
        for missingFile in available:
            with open(os.path.join(missingDir, missingFile)) as fd:
                self.logger.info("Adding missing lumis from job %s",
                                 missingFile)
                missing = missing + LumiList(
                    compactList=literal_eval(fd.read()))
        for failedId in failed:
            f = None
            try:
                tmpdir = tempfile.mkdtemp()
                f = tarfile.open("run_and_lumis.tar.gz")
                fn = "job_lumis_{0}.json".format(failedId)
                f.extract(fn, path=tmpdir)
                with open(os.path.join(tmpdir, fn)) as fd:
                    injson = json.load(fd)
                    missing = missing + LumiList(compactList=injson)
                    self.logger.info("Adding lumis from failed job %s",
                                     failedId)
            finally:
                if f:
                    f.close()
                shutil.rmtree(tmpdir)
        missing_compact = missing.getCompactList()
        runs = missing.getRuns()
        # Compact list is like
        # {
        # '1': [[1, 33], [35, 35], [37, 47], [49, 75], [77, 130], [133, 136]],
        # '2':[[1,45],[50,80]]
        # }
        # Now we turn lumis it into something like:
        # lumis=['1, 33, 35, 35, 37, 47, 49, 75, 77, 130, 133, 136','1,45,50,80']
        # which is the format expected by buildLumiMask in the splitting algorithm
        lumis = [
            ",".join(
                str(l) for l in functools.reduce(
                    lambda x, y: x + y, missing_compact[run])) for run in runs
        ]

        task['tm_split_args']['runs'] = runs
        task['tm_split_args']['lumis'] = lumis

        return True
Пример #59
0
    def testFilter(self):
        """
        Test filtering of a list of lumis
        """
        runsAndLumis = {
            1: range(1, 34) + [35] + range(37, 48),
            2: range(49, 76) + range(77, 131) + range(133, 137)
        }

        completeList = list(zip([1]*150, range(1, 150))) + \
                       list(zip([2]*150, range(1, 150))) + \
                       list(zip([3]*150, range(1, 150)))

        smallList    = list(zip([1]*50,  range(1, 10))) + list(zip([2]*50, range(50, 70)))
        overlapList  = list(zip([1]*150, range(30, 40))) + \
                       list(zip([2]*150, range(60, 80)))
        overlapRes   = list(zip([1]*9,   range(30, 34))) + [(1, 35)] + \
                       list(zip([1]*9,   range(37, 40))) + \
                       list(zip([2]*30,  range(60, 76))) + \
                       list(zip([2]*9,   range(77, 80)))

        runLister = LumiList(runsAndLumis = runsAndLumis)

        # Test a list to be filtered which is a superset of constructed list
        filterComplete = runLister.filterLumis(completeList)
        # Test a list to be filtered which is a subset of constructed list
        filterSmall    = runLister.filterLumis(smallList)
        # Test a list to be filtered which is neither
        filterOverlap  = runLister.filterLumis(overlapList)

        self.assertTrue(filterComplete == runLister.getLumis())
        self.assertTrue(filterSmall    == smallList)
        self.assertTrue(filterOverlap  == overlapRes)