Пример #1
0
    def split(self):
        """Apply policy to spec"""
        # Prepare a site list in case we need it
        siteWhitelist = self.initialTask.siteWhitelist()
        siteBlacklist = self.initialTask.siteBlacklist()
        self.sites = makeLocationsList(siteWhitelist, siteBlacklist)

        for block in self.validBlocks(self.initialTask):
            parentList = {}
            parentFlag = False
            if self.initialTask.parentProcessingFlag():
                parentFlag = True
                parentList[block["Name"]] = block['Sites']

            self.newQueueElement(
                Inputs={block['Name']: block['Sites']},
                ParentFlag=parentFlag,
                ParentData=parentList,
                NumberOfLumis=block[self.lumiType],
                NumberOfFiles=block['NumberOfFiles'],
                NumberOfEvents=block['NumberOfEvents'],
                Jobs=ceil(
                    float(block[self.args['SliceType']]) /
                    float(self.args['SliceSize'])),
                ACDC=block['ACDC'],
                NoInputUpdate=self.initialTask.getTrustSitelists().get(
                    'trustlists'),
                NoPileupUpdate=self.initialTask.getTrustSitelists().get(
                    'trustPUlists'))
Пример #2
0
    def split(self):
        """Apply policy to spec"""
        # Prepare a site list in case we need it
        siteWhitelist = self.initialTask.siteWhitelist()
        siteBlacklist = self.initialTask.siteBlacklist()
        self.sites = makeLocationsList(siteWhitelist, siteBlacklist)

        for block in self.validBlocks(self.initialTask):
            parentList = {}
            parentFlag = False
            if self.initialTask.parentProcessingFlag():
                parentFlag = True
                parentList[block["Name"]] = block['Sites']

            self.newQueueElement(Inputs={block['Name']: block['Sites']},
                                 ParentFlag=parentFlag,
                                 ParentData=parentList,
                                 NumberOfLumis=block[self.lumiType],
                                 NumberOfFiles=block['NumberOfFiles'],
                                 NumberOfEvents=block['NumberOfEvents'],
                                 Jobs=ceil(float(block[self.args['SliceType']]) /
                                           float(self.args['SliceSize'])),
                                 ACDC=block['ACDC'],
                                 NoInputUpdate=self.initialTask.getTrustSitelists().get('trustlists'),
                                 NoPileupUpdate=self.initialTask.getTrustSitelists().get('trustPUlists')
                                )
Пример #3
0
    def split(self):
        """Apply policy to spec"""
        # Prepare a site list in case we need it
        siteWhitelist = self.initialTask.siteWhitelist()
        siteBlacklist = self.initialTask.siteBlacklist()
        self.sites = makeLocationsList(siteWhitelist, siteBlacklist)

        for block in self.validBlocks(self.initialTask):
            if self.initialTask.parentProcessingFlag():
                parentFlag = True
            else:
                parentFlag = False
            self.newQueueElement(Inputs = {block['Name'] : block['Sites']},
                                 ParentFlag = parentFlag,
                                 NumberOfLumis = block[self.lumiType],
                                 NumberOfFiles = block['NumberOfFiles'],
                                 NumberOfEvents = block['NumberOfEvents'],
                                 Jobs = ceil(float(block[self.args['SliceType']]) /
                                             float(self.args['SliceSize'])),
                                 ACDC = block['ACDC'],
                                 )
Пример #4
0
    def split(self):
        """Apply policy to spec"""
        # Prepare a site list in case we need it
        siteWhitelist = self.initialTask.siteWhitelist()
        siteBlacklist = self.initialTask.siteBlacklist()
        self.sites = makeLocationsList(siteWhitelist, siteBlacklist)

        for block in self.validBlocks(self.initialTask):
            if self.initialTask.parentProcessingFlag():
                parentFlag = True
            else:
                parentFlag = False
            self.newQueueElement(Inputs = {block['Name'] : block['Sites']},
                                 ParentFlag = parentFlag,
                                 NumberOfLumis = block[self.lumiType],
                                 NumberOfFiles = block['NumberOfFiles'],
                                 NumberOfEvents = block['NumberOfEvents'],
                                 Jobs = ceil(float(block[self.args['SliceType']]) /
                                             float(self.args['SliceSize'])),
                                 ACDC = block['ACDC'],
                                 )
Пример #5
0
    def __call__(self, wmTask):
        """
        Method is called  when WorkQueue creates the sandbox for a job.
        Need to look at the pileup configuration in the spec and query dbs to
        determine the lfns for the files in the datasets and what sites they're
        located at (WQ creates the job sandbox).

        wmTask is instance of WMTask.WMTaskHelper

        """
        fakeSites = []

        # check whether we need to overlook the PU data location
        if wmTask.getTrustSitelists().get('trustPUlists'):
            fakeSites = makeLocationsList(wmTask.siteWhitelist(), wmTask.siteBlacklist())

        for step in wmTask.steps().nodeIterator():
            helper = WMStep.WMStepHelper(step)
            # returns e.g. instance of CMSSWHelper
            # doesn't seem to be necessary ... strangely (some inheritance involved?)
            # typeHelper = helper.getTypeHelper()
            if hasattr(helper.data, "pileup"):
                self._createPileupConfigFile(helper, fakeSites)
Пример #6
0
    def __call__(self, wmTask):
        """
        Method is called  when WorkQueue creates the sandbox for a job.
        Need to look at the pileup configuration in the spec and query dbs to
        determine the lfns for the files in the datasets and what sites they're
        located at (WQ creates the job sandbox).

        wmTask is instance of WMTask.WMTaskHelper

        """
        fakeSites = []

        # check whether we need to pretend PU data location
        if wmTask.getTrustSitelists():
            fakeSites = makeLocationsList(wmTask.siteWhitelist(),
                                          wmTask.siteBlacklist())

        for step in wmTask.steps().nodeIterator():
            helper = WMStep.WMStepHelper(step)
            # returns e.g. instance of CMSSWHelper
            # doesn't seem to be necessary ... strangely (some inheritance involved?)
            # typeHelper = helper.getTypeHelper()
            if hasattr(helper.data, "pileup"):
                self._createPileupConfigFile(helper, fakeSites)
Пример #7
0
    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        Lexicon.dataset(datasetPath)  # check dataset name
        validBlocks = []
        locations = None

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()
        lumiMask = task.getLumiMask()
        if lumiMask:
            maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath)

        for blockName in dbs.listFileBlocks(datasetPath):
            # check block restrictions
            if blockWhiteList and blockName not in blockWhiteList:
                continue
            if blockName in blockBlackList:
                continue

            blockSummary = dbs.getDBSSummaryInfo(block=blockName)
            if int(blockSummary.get('NumberOfFiles', 0)) == 0:
                logging.warning("Block %s being rejected for lack of valid files to process", blockName)
                self.badWork.append(blockName)
                continue

            if self.args['SliceType'] == 'NumberOfRuns':
                blockSummary['NumberOfRuns'] = dbs.listRuns(block=blockName)

            # check lumi restrictions
            if lumiMask:
                if blockName not in maskedBlocks:
                    logging.warning("Block %s doesn't pass the lumi mask constraints", blockName)
                    self.rejectedWork.append(blockName)
                    continue

                acceptedLumiCount = sum([len(maskedBlocks[blockName][lfn].getLumis()) for lfn in maskedBlocks[blockName]])
                ratioAccepted = 1. * acceptedLumiCount / float(blockSummary['NumberOfLumis'])
                maskedRuns = [maskedBlocks[blockName][lfn].getRuns() for lfn in maskedBlocks[blockName]]
                acceptedRuns = set(lumiMask.getRuns()).intersection(set().union(*maskedRuns))

                blockSummary['NumberOfFiles'] = len(maskedBlocks[blockName])
                blockSummary['NumberOfEvents'] = float(blockSummary['NumberOfEvents']) * ratioAccepted
                blockSummary[self.lumiType] = acceptedLumiCount
                blockSummary['NumberOfRuns'] = acceptedRuns
            # check run restrictions
            elif runWhiteList or runBlackList:
                runs = set(dbs.listRuns(block=blockName))
                # multi run blocks need special account, requires more DBS calls
                recalculateLumiCounts = True if len(runs) > 1 else False

                # apply blacklist and whitelist
                runs = runs.difference(runBlackList)
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    logging.warning("Block %s doesn't pass the runs constraints", blockName)
                    self.rejectedWork.append(blockName)
                    continue

                if recalculateLumiCounts:
                    # Recalculate the number of files, lumis and ~events accepted
                    acceptedLumiCount = 0
                    acceptedEventCount = 0
                    acceptedFileCount = 0
                    fileInfo = dbs.listFilesInBlock(fileBlockName=blockName)

                    for fileEntry in fileInfo:
                        acceptedFile = False
                        for lumiInfo in fileEntry['LumiList']:
                            if lumiInfo['RunNumber'] in runs:
                                acceptedFile = True
                                acceptedLumiCount += len(lumiInfo['LumiSectionNumber'])
                        if acceptedFile:
                            acceptedFileCount += 1
                            acceptedEventCount += fileEntry['NumberOfEvents']

                else:
                    acceptedLumiCount = blockSummary["NumberOfLumis"]
                    acceptedFileCount = blockSummary['NumberOfFiles']
                    acceptedEventCount = blockSummary['NumberOfEvents']

                blockSummary[self.lumiType] = acceptedLumiCount
                blockSummary['NumberOfFiles'] = acceptedFileCount
                blockSummary['NumberOfEvents'] = acceptedEventCount
                blockSummary['NumberOfRuns'] = runs

            validBlocks.append(blockSummary)

            if locations is None:
                locations = set(dbs.listFileBlockLocation(blockName))
            else:
                locations = locations.intersection(dbs.listFileBlockLocation(blockName))

        # all needed blocks present at these sites
        if task.getTrustSitelists().get('trustlists'):
            siteWhitelist = task.siteWhitelist()
            siteBlacklist = task.siteBlacklist()
            self.sites = makeLocationsList(siteWhitelist, siteBlacklist)
            self.data[datasetPath] = self.sites
        elif locations:
            self.data[datasetPath] = list(set(self.cric.PNNstoPSNs(locations)))

        return validBlocks
Пример #8
0
    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        validBlocks = []

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()
        if task.getLumiMask():  # if we have a lumi mask get only the relevant blocks
            maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath)
        if task.getTrustSitelists().get('trustlists'):
            siteWhitelist = task.siteWhitelist()
            siteBlacklist = task.siteBlacklist()
            self.sites = makeLocationsList(siteWhitelist, siteBlacklist)

        blocks = []
        # Take data inputs or from spec
        if not self.data:
            if blockWhiteList:
                self.data = dict((block, []) for block in blockWhiteList)
            else:
                self.data = {datasetPath: []}  # same structure as in WorkQueueElement

        for data in self.data:
            if data.find('#') > -1:
                Lexicon.block(data)  # check block name
                datasetPath = str(data.split('#')[0])
                blocks.append(str(data))
            else:
                Lexicon.dataset(data)  # check dataset name
                for block in dbs.listFileBlocks(data, onlyClosedBlocks=True):
                    blocks.append(str(block))

        for blockName in blocks:
            # check block restrictions
            if blockWhiteList and blockName not in blockWhiteList:
                continue
            if blockName in blockBlackList:
                continue
            if blockName in self.blockBlackListModifier:
                # Don't duplicate blocks rejected before or blocks that were included and therefore are now in the blacklist
                continue
            if task.getLumiMask() and blockName not in maskedBlocks:
                self.rejectedWork.append(blockName)
                continue

            block = dbs.getDBSSummaryInfo(datasetPath, block=blockName)
            # blocks with 0 valid files should be ignored
            # - ideally they would be deleted but dbs can't delete blocks
            if not block['NumberOfFiles'] or block['NumberOfFiles'] == '0':
                self.rejectedWork.append(blockName)
                continue

            # check lumi restrictions
            if task.getLumiMask():
                accepted_lumis = sum([len(maskedBlocks[blockName][lfn].getLumis()) for lfn in maskedBlocks[blockName]])
                # use the information given from getMaskedBlocks to compute che size of the block
                block['NumberOfFiles'] = len(maskedBlocks[blockName])
                # ratio =  lumis which are ok in the block / total num lumis
                ratioAccepted = accepted_lumis / block['NumberOfLumis']
                block['NumberOfEvents'] = block['NumberOfEvents'] * ratioAccepted
                block[self.lumiType] = accepted_lumis
            # check run restrictions
            elif runWhiteList or runBlackList:
                # listRunLumis returns a dictionary with the lumi sections per run
                runLumis = dbs.listRunLumis(block=block['block'])
                runs = set(runLumis.keys())
                recalculateLumiCounts = False
                if len(runs) > 1:
                    # If more than one run in the block
                    # Then we must calculate the lumi counts after filtering the run list
                    # This has to be done rarely and requires calling DBS file information
                    recalculateLumiCounts = True

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    self.rejectedWork.append(blockName)
                    continue

                if len(runs) == len(runLumis):
                    # If there is no change in the runs, then we can skip recalculating lumi counts
                    recalculateLumiCounts = False

                if recalculateLumiCounts:
                    # Recalculate effective size of block
                    # We pull out file info, since we don't do this often
                    acceptedLumiCount = 0
                    acceptedEventCount = 0
                    acceptedFileCount = 0
                    fileInfo = dbs.listFilesInBlock(fileBlockName=block['block'])
                    for fileEntry in fileInfo:
                        acceptedFile = False
                        acceptedFileLumiCount = 0
                        for lumiInfo in fileEntry['LumiList']:
                            runNumber = lumiInfo['RunNumber']
                            if runNumber in runs:
                                acceptedFile = True
                                acceptedFileLumiCount += 1
                                acceptedLumiCount += len(lumiInfo['LumiSectionNumber'])
                        if acceptedFile:
                            acceptedFileCount += 1
                            if len(fileEntry['LumiList']) != acceptedFileLumiCount:
                                acceptedEventCount += acceptedFileLumiCount * fileEntry['NumberOfEvents'] / len(fileEntry['LumiList'])
                            else:
                                acceptedEventCount += fileEntry['NumberOfEvents']
                    block[self.lumiType] = acceptedLumiCount
                    block['NumberOfFiles'] = acceptedFileCount
                    block['NumberOfEvents'] = acceptedEventCount
            # save locations
            if task.getTrustSitelists().get('trustlists'):
                self.data[block['block']] = self.sites
            else:
                self.data[block['block']] = self.siteDB.PNNstoPSNs(dbs.listFileBlockLocation(block['block']))

            # TODO: need to decide what to do when location is no find.
            # There could be case for network problem (no connection to dbs, phedex)
            # or DBS se is not recorded (This will be retried anyway by location mapper)
            if not self.data[block['block']]:
                self.data[block['block']] = ["NoInitialSite"]
            # # No sites for this block, move it to rejected
            #    self.rejectedWork.append(blockName)
            #    continue

            validBlocks.append(block)
        return validBlocks
Пример #9
0
    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        validBlocks = []

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()
        if task.getLumiMask(
        ):  # if we have a lumi mask get only the relevant blocks
            maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath)
        if task.getTrustSitelists().get('trustlists'):
            siteWhitelist = task.siteWhitelist()
            siteBlacklist = task.siteBlacklist()
            self.sites = makeLocationsList(siteWhitelist, siteBlacklist)

        blocks = []
        # Take data inputs or from spec
        if not self.data:
            if blockWhiteList:
                self.data = dict((block, []) for block in blockWhiteList)
            else:
                self.data = {
                    datasetPath: []
                }  # same structure as in WorkQueueElement

        for data in self.data:
            if data.find('#') > -1:
                Lexicon.block(data)  # check block name
                datasetPath = str(data.split('#')[0])
                blocks.append(str(data))
            else:
                Lexicon.dataset(data)  # check dataset name
                for block in dbs.listFileBlocks(data, onlyClosedBlocks=True):
                    blocks.append(str(block))

        for blockName in blocks:
            # check block restrictions
            if blockWhiteList and blockName not in blockWhiteList:
                continue
            if blockName in blockBlackList:
                continue
            if blockName in self.blockBlackListModifier:
                # Don't duplicate blocks rejected before or blocks that were included and therefore are now in the blacklist
                continue
            if task.getLumiMask() and blockName not in maskedBlocks:
                self.rejectedWork.append(blockName)
                continue

            block = dbs.getDBSSummaryInfo(datasetPath, block=blockName)
            # blocks with 0 valid files should be ignored
            # - ideally they would be deleted but dbs can't delete blocks
            if not block['NumberOfFiles'] or block['NumberOfFiles'] == '0':
                self.rejectedWork.append(blockName)
                continue

            # check lumi restrictions
            if task.getLumiMask():
                accepted_lumis = sum([
                    len(maskedBlocks[blockName][lfn].getLumis())
                    for lfn in maskedBlocks[blockName]
                ])
                # use the information given from getMaskedBlocks to compute che size of the block
                block['NumberOfFiles'] = len(maskedBlocks[blockName])
                # ratio =  lumis which are ok in the block / total num lumis
                ratioAccepted = 1. * accepted_lumis / float(
                    block['NumberOfLumis'])
                block['NumberOfEvents'] = float(
                    block['NumberOfEvents']) * ratioAccepted
                block[self.lumiType] = accepted_lumis
            # check run restrictions
            elif runWhiteList or runBlackList:
                # listRunLumis returns a dictionary with the lumi sections per run
                runLumis = dbs.listRunLumis(block=block['block'])
                runs = set(runLumis.keys())
                recalculateLumiCounts = False
                if len(runs) > 1:
                    # If more than one run in the block
                    # Then we must calculate the lumi counts after filtering the run list
                    # This has to be done rarely and requires calling DBS file information
                    recalculateLumiCounts = True

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    self.rejectedWork.append(blockName)
                    continue

                if len(runs) == len(runLumis):
                    # If there is no change in the runs, then we can skip recalculating lumi counts
                    recalculateLumiCounts = False

                if recalculateLumiCounts:
                    # Recalculate effective size of block
                    # We pull out file info, since we don't do this often
                    acceptedLumiCount = 0
                    acceptedEventCount = 0
                    acceptedFileCount = 0
                    fileInfo = dbs.listFilesInBlock(
                        fileBlockName=block['block'])
                    for fileEntry in fileInfo:
                        acceptedFile = False
                        acceptedFileLumiCount = 0
                        for lumiInfo in fileEntry['LumiList']:
                            runNumber = lumiInfo['RunNumber']
                            if runNumber in runs:
                                acceptedFile = True
                                acceptedFileLumiCount += 1
                                acceptedLumiCount += len(
                                    lumiInfo['LumiSectionNumber'])
                        if acceptedFile:
                            acceptedFileCount += 1
                            if len(fileEntry['LumiList']
                                   ) != acceptedFileLumiCount:
                                acceptedEventCount += float(acceptedFileLumiCount) * fileEntry['NumberOfEvents'] \
                                                      / len(fileEntry['LumiList'])
                            else:
                                acceptedEventCount += fileEntry[
                                    'NumberOfEvents']
                    block[self.lumiType] = acceptedLumiCount
                    block['NumberOfFiles'] = acceptedFileCount
                    block['NumberOfEvents'] = acceptedEventCount
            # save locations
            if task.getTrustSitelists().get('trustlists'):
                self.data[block['block']] = self.sites
            else:
                self.data[block['block']] = self.siteDB.PNNstoPSNs(
                    dbs.listFileBlockLocation(block['block']))

            # TODO: need to decide what to do when location is no find.
            # There could be case for network problem (no connection to dbs, phedex)
            # or DBS se is not recorded (This will be retried anyway by location mapper)
            if not self.data[block['block']]:
                self.data[block['block']] = ["NoInitialSite"]
            # # No sites for this block, move it to rejected
            #    self.rejectedWork.append(blockName)
            #    continue

            validBlocks.append(block)
        return validBlocks
Пример #10
0
    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        Lexicon.dataset(datasetPath)  # check dataset name
        validBlocks = []
        locations = None

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()

        if task.getTrustSitelists():
            siteWhitelist = task.siteWhitelist()
            siteBlacklist = task.siteBlacklist()
            self.sites = makeLocationsList(siteWhitelist, siteBlacklist)

        for blockName in dbs.listFileBlocks(datasetPath):
            block = dbs.getDBSSummaryInfo(datasetPath, block=blockName)

            # check block restrictions
            if blockWhiteList and block['block'] not in blockWhiteList:
                continue
            if block['block'] in blockBlackList:
                continue

            # check run restrictions
            if runWhiteList or runBlackList:
                # listRunLumis returns a dictionary with the lumi sections per run
                runLumis = dbs.listRunLumis(block=block['block'])
                runs = set(runLumis.keys())
                recalculateLumiCounts = False
                if len(runs) > 1:
                    # If more than one run in the block
                    # Then we must calculate the lumi counts after filtering the run list
                    # This has to be done rarely and requires calling DBS file information
                    recalculateLumiCounts = True

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    continue

                if recalculateLumiCounts:
                    # get correct lumi count
                    # Recalculate effective size of block
                    # We pull out file info, since we don't do this often
                    acceptedLumiCount = 0
                    acceptedEventCount = 0
                    acceptedFileCount = 0
                    fileInfo = dbs.listFilesInBlock(
                        fileBlockName=block['block'])
                    for fileEntry in fileInfo:
                        acceptedFile = False
                        acceptedFileLumiCount = 0
                        for lumiInfo in fileEntry['LumiList']:
                            runNumber = lumiInfo['RunNumber']
                            if runNumber in runs:
                                acceptedFile = True
                                acceptedFileLumiCount += 1
                        if acceptedFile:
                            acceptedFileCount += 1
                            acceptedLumiCount += acceptedFileLumiCount
                            if len(fileEntry['LumiList']
                                   ) != acceptedFileLumiCount:
                                acceptedEventCount += float(
                                    acceptedFileLumiCount
                                ) * fileEntry['NumberOfEvents'] / len(
                                    fileEntry['LumiList'])
                            else:
                                acceptedEventCount += fileEntry[
                                    'NumberOfEvents']
                else:
                    acceptedLumiCount = block["NumberOfLumis"]
                    acceptedFileCount = block['NumberOfFiles']
                    acceptedEventCount = block['NumberOfEvents']

                # recalculate effective size of block
                # make a guess for new event/file numbers from ratio
                # of accepted lumi sections (otherwise have to pull file info)

                block[self.lumiType] = acceptedLumiCount
                block['NumberOfFiles'] = acceptedFileCount
                block['NumberOfEvents'] = acceptedEventCount

            validBlocks.append(block)
            if locations is None:
                locations = set(dbs.listFileBlockLocation(block['block']))
            else:
                locations = locations.intersection(
                    dbs.listFileBlockLocation(block['block']))

        # all needed blocks present at these sites
        if self.wmspec.getTrustLocationFlag():
            self.data[datasetPath] = self.sites
        elif locations:
            self.data[datasetPath] = list(
                set(self.siteDB.PNNstoPSNs(locations)))

        return validBlocks
Пример #11
0
    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        Lexicon.dataset(datasetPath)  # check dataset name
        validBlocks = []
        locations = None

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()

        if task.getTrustSitelists().get("trustlists"):
            siteWhitelist = task.siteWhitelist()
            siteBlacklist = task.siteBlacklist()
            self.sites = makeLocationsList(siteWhitelist, siteBlacklist)

        for blockName in dbs.listFileBlocks(datasetPath):
            block = dbs.getDBSSummaryInfo(datasetPath, block=blockName)

            # check block restrictions
            if blockWhiteList and block["block"] not in blockWhiteList:
                continue
            if block["block"] in blockBlackList:
                continue

            # check run restrictions
            if runWhiteList or runBlackList:
                # listRunLumis returns a dictionary with the lumi sections per run
                runLumis = dbs.listRunLumis(block=block["block"])
                runs = set(runLumis.keys())
                recalculateLumiCounts = False
                if len(runs) > 1:
                    # If more than one run in the block
                    # Then we must calculate the lumi counts after filtering the run list
                    # This has to be done rarely and requires calling DBS file information
                    recalculateLumiCounts = True

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    continue

                if recalculateLumiCounts:
                    # get correct lumi count
                    # Recalculate effective size of block
                    # We pull out file info, since we don't do this often
                    acceptedLumiCount = 0
                    acceptedEventCount = 0
                    acceptedFileCount = 0
                    fileInfo = dbs.listFilesInBlock(fileBlockName=block["block"])
                    for fileEntry in fileInfo:
                        acceptedFile = False
                        acceptedFileLumiCount = 0
                        for lumiInfo in fileEntry["LumiList"]:
                            runNumber = lumiInfo["RunNumber"]
                            if runNumber in runs:
                                acceptedFile = True
                                acceptedFileLumiCount += 1
                        if acceptedFile:
                            acceptedFileCount += 1
                            acceptedLumiCount += acceptedFileLumiCount
                            if len(fileEntry["LumiList"]) != acceptedFileLumiCount:
                                acceptedEventCount += (
                                    float(acceptedFileLumiCount)
                                    * fileEntry["NumberOfEvents"]
                                    / len(fileEntry["LumiList"])
                                )
                            else:
                                acceptedEventCount += fileEntry["NumberOfEvents"]
                else:
                    acceptedLumiCount = block["NumberOfLumis"]
                    acceptedFileCount = block["NumberOfFiles"]
                    acceptedEventCount = block["NumberOfEvents"]

                # recalculate effective size of block
                # make a guess for new event/file numbers from ratio
                # of accepted lumi sections (otherwise have to pull file info)

                block[self.lumiType] = acceptedLumiCount
                block["NumberOfFiles"] = acceptedFileCount
                block["NumberOfEvents"] = acceptedEventCount

            validBlocks.append(block)
            if locations is None:
                locations = set(dbs.listFileBlockLocation(block["block"]))
            else:
                locations = locations.intersection(dbs.listFileBlockLocation(block["block"]))

        # all needed blocks present at these sites
        if self.wmspec.getTrustLocationFlag().get("trustlists"):
            self.data[datasetPath] = self.sites
        elif locations:
            self.data[datasetPath] = list(set(self.siteDB.PNNstoPSNs(locations)))

        return validBlocks