Пример #1
0
    def report(self, workflow, userdn, userproxy=None):
        res = {}
        self.logger.info(
            "About to get output of workflow: %s. Getting status first." %
            workflow)
        statusRes = self.status(workflow, userdn, userproxy)[0]

        #load the lumimask
        rows = self.api.query(None, None, ID.sql, taskname=workflow)
        splitArgs = literal_eval(rows.next()[6].read())
        res['lumiMask'] = buildLumiMask(splitArgs['runs'], splitArgs['lumis'])

        #extract the finished jobs from filemetadata
        jobids = [x[1] for x in statusRes['jobList'] if x[0] in ['finished']]
        rows = self.api.query(None, None, GetFromPandaIds.sql, types='EDM', taskname=workflow, jobids=','.join(map(str,jobids)),\
                                        limit=len(jobids)*100)
        res['runsAndLumis'] = {}
        for row in rows:
            res['runsAndLumis'][str(row[GetFromPandaIds.PANDAID])] = {
                'parents': row[GetFromPandaIds.PARENTS].read(),
                'runlumi': row[GetFromPandaIds.RUNLUMI].read(),
                'events': row[GetFromPandaIds.INEVENTS],
            }

        yield res
Пример #2
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        """
        myThread = threading.currentThread()

        periodicInterval = kwargs.get("periodic_harvest_interval", 0)
        periodicSibling = kwargs.get("periodic_harvest_sibling", False)
        dqmHarvestUnit = kwargs.get("dqmHarvestUnit", "byRun")
        runs = kwargs.get("runs", None)
        lumis = kwargs.get("lumis", None)

        daoFactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        releasePeriodicJobDAO = daoFactory(
            classname="JobSplitting.ReleasePeriodicJob")
        periodicSiblingCompleteDAO = daoFactory(
            classname="JobSplitting.PeriodicSiblingComplete")

        fileset = self.subscription.getFileset()
        fileset.load()

        goodRunList = {}
        if runs and lumis:
            goodRunList = buildLumiMask(runs, lumis)

        if periodicInterval and periodicInterval > 0:

            # Trigger the Periodic Job if
            #  * it is the first job OR
            #  * the last job ended more than periodicInterval seconds ago
            triggerJob = releasePeriodicJobDAO.execute(
                subscription=self.subscription["id"], period=periodicInterval)

            if triggerJob:
                myThread.logger.debug("Creating Periodic harvesting job")
                self.createJobsLocationWise(fileset, False, dqmHarvestUnit,
                                            goodRunList)

        elif not fileset.open:

            # Trigger the EndOfRun job if
            #  * (same as Periodic to not have JobCreator go nuts and stop after the first iteration)
            #  * there is no Periodic sibling subscription OR
            #  * the Periodic sibling subscription is complete
            triggerJob = releasePeriodicJobDAO.execute(
                subscription=self.subscription["id"], period=3600)
            if triggerJob and periodicSibling:
                triggerJob = periodicSiblingCompleteDAO.execute(
                    subscription=self.subscription["id"])

            if triggerJob:
                myThread.logger.debug("Creating EndOfRun harvesting job")
                self.createJobsLocationWise(fileset, True, dqmHarvestUnit,
                                            goodRunList)

        return
Пример #3
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Split files into a number of lumis per job
        Allow a flag to determine if we split files between jobs
        """

        avgEventsPerJob = int(kwargs.get('events_per_job', 5000))
        eventLimit      = int(kwargs.get('max_events_per_lumi', 20000))
        splitOnFile     = bool(kwargs.get('halt_job_on_file_boundaries', True))
        ignoreACDC      = bool(kwargs.get('ignore_acdc_except', False))
        collectionName  = kwargs.get('collectionName', None)
        splitOnRun      = kwargs.get('splitOnRun', True)
        getParents      = kwargs.get('include_parents', False)
        runWhitelist    = kwargs.get('runWhitelist', [])
        runs            = kwargs.get('runs', None)
        lumis           = kwargs.get('lumis', None)
        timePerEvent, sizePerEvent, memoryRequirement = \
                    self.getPerformanceParameters(kwargs.get('performance', {}))
        deterministicPileup = kwargs.get('deterministicPileup', False)
        eventsPerLumiInDataset = 0

        if deterministicPileup and self.package == 'WMCore.WMBS':
            getJobNumber = self.daoFactory(classname = "Jobs.GetNumberOfJobsPerWorkflow")
            jobNumber = getJobNumber.execute(workflow = self.subscription.getWorkflow().id)
            self.nJobs = jobNumber

        goodRunList = {}
        if runs and lumis:
            goodRunList = buildLumiMask(runs, lumis)

        # If we have runLumi info, we need to load it from couch
        if collectionName:
            try:
                from WMCore.ACDC.DataCollectionService import DataCollectionService
                couchURL       = kwargs.get('couchURL')
                couchDB        = kwargs.get('couchDB')
                filesetName    = kwargs.get('filesetName')
                collectionName = kwargs.get('collectionName')
                owner          = kwargs.get('owner')
                group          = kwargs.get('group')

                logging.info('Creating jobs for ACDC fileset %s' % filesetName)
                dcs = DataCollectionService(couchURL, couchDB)
                goodRunList = dcs.getLumiWhitelist(collectionName, filesetName, owner, group)
            except Exception, ex:
                msg =  "Exception while trying to load goodRunList\n"
                if ignoreACDC:
                    msg +=  "Ditching goodRunList\n"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    logging.error(msg)
                    goodRunList = {}
                else:
                    msg +=  "Refusing to create any jobs.\n"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    logging.error(msg)
                    return
Пример #4
0
    def report(self, workflow, userdn, userproxy=None):
        res = {}
        self.logger.info("About to compute report of workflow: %s. Getting status first." % workflow)
        statusRes = self.status(workflow, userdn, userproxy)[0]

        #load the lumimask
        rows = self.api.query(None, None, ID.sql, taskname = workflow)
        splitArgs = literal_eval(rows.next()[6].read())
        res['lumiMask'] = buildLumiMask(splitArgs['runs'], splitArgs['lumis'])
        self.logger.info("Lumi mask was: %s" % res['lumiMask'])

        #extract the finished jobs from filemetadata
        jobids = [x[1] for x in statusRes['jobList'] if x[0] in ['finished', 'transferring']]
        rows = self.api.query(None, None, GetFromTaskAndType.sql, filetype='EDM', taskname=workflow)

        res['runsAndLumis'] = {}
        for row in rows:
            if row[GetFromTaskAndType.PANDAID] in jobids:
                res['runsAndLumis'][str(row[GetFromTaskAndType.PANDAID])] = { 'parents' : row[GetFromTaskAndType.PARENTS].read(),
                        'runlumi' : row[GetFromTaskAndType.RUNLUMI].read(),
                        'events'  : row[GetFromTaskAndType.INEVENTS],
                }
        self.logger.info("Got %s edm files for workflow %s" % (len(res), workflow))

        yield res
Пример #5
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        """

        myThread = threading.currentThread()

        periodicInterval = kwargs.get("periodic_harvest_interval", 0)
        periodicSibling = kwargs.get("periodic_harvest_sibling", False)
        dqmHarvestUnit = kwargs.get("dqmHarvestUnit", "byRun")
        runs = kwargs.get("runs", None)
        lumis = kwargs.get("lumis", None)
        runWhitelist = set(kwargs.get('runWhitelist', []))
        runBlacklist = set(kwargs.get('runBlacklist', []))
        goodRunList = runWhitelist.difference(runBlacklist)

        daoFactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        releasePeriodicJobDAO = daoFactory(classname="JobSplitting.ReleasePeriodicJob")
        periodicSiblingCompleteDAO = daoFactory(classname="JobSplitting.PeriodicSiblingComplete")

        fileset = self.subscription.getFileset()
        fileset.load()

        lumiMask = {}
        if runs and lumis:
            lumiMask = buildLumiMask(runs, lumis)

        if periodicInterval and periodicInterval > 0:

            # Trigger the Periodic Job if
            #  * it is the first job OR
            #  * the last job ended more than periodicInterval seconds ago
            triggerJob = releasePeriodicJobDAO.execute(subscription=self.subscription["id"], period=periodicInterval)

            if triggerJob:
                myThread.logger.debug("Creating Periodic harvesting job")
                self.createJobsLocationWise(fileset, False, dqmHarvestUnit, lumiMask, goodRunList)

        elif not fileset.open:

            # Trigger the EndOfRun job if
            #  * (same as Periodic to not have JobCreator go nuts and stop after the first iteration)
            #  * there is no Periodic sibling subscription OR
            #  * the Periodic sibling subscription is complete
            triggerJob = releasePeriodicJobDAO.execute(subscription=self.subscription["id"], period=3600)
            if triggerJob and periodicSibling:
                triggerJob = periodicSiblingCompleteDAO.execute(subscription=self.subscription["id"])

            if triggerJob:
                myThread.logger.debug("Creating EndOfRun harvesting job")
                self.createJobsLocationWise(fileset, True, dqmHarvestUnit, lumiMask, goodRunList)

        return
Пример #6
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Split files into a number of lumis per job
        Allow a flag to determine if we split files between jobs
        """

        myThread = threading.currentThread()

        lumisPerJob = int(kwargs.get('lumis_per_job', 1))
        splitOnFile = bool(kwargs.get('halt_job_on_file_boundaries', True))
        ignoreACDC = bool(kwargs.get('ignore_acdc_except', False))
        collectionName = kwargs.get('collectionName', None)
        splitOnRun = kwargs.get('splitOnRun', True)
        getParents = kwargs.get('include_parents', False)
        runWhitelist = kwargs.get('runWhitelist', [])
        runs = kwargs.get('runs', None)
        lumis = kwargs.get('lumis', None)
        timePerEvent, sizePerEvent, memoryRequirement = \
                    self.getPerformanceParameters(kwargs.get('performance', {}))

        goodRunList = {}
        if runs and lumis:
            goodRunList = buildLumiMask(runs, lumis)

        # If we have runLumi info, we need to load it from couch
        if collectionName:
            try:
                from WMCore.ACDC.DataCollectionService import DataCollectionService
                couchURL = kwargs.get('couchURL')
                couchDB = kwargs.get('couchDB')
                filesetName = kwargs.get('filesetName')
                collectionName = kwargs.get('collectionName')
                owner = kwargs.get('owner')
                group = kwargs.get('group')

                logging.info('Creating jobs for ACDC fileset %s' % filesetName)
                dcs = DataCollectionService(couchURL, couchDB)
                goodRunList = dcs.getLumiWhitelist(collectionName, filesetName, owner, group)
            except Exception, ex:
                msg = "Exception while trying to load goodRunList\n"
                if ignoreACDC:
                    msg += "Ditching goodRunList\n"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    logging.error(msg)
                    goodRunList = {}
                else:
                    msg += "Refusing to create any jobs.\n"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    logging.error(msg)
                    return
Пример #7
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Split files into a number of lumis per job
        Allow a flag to determine if we split files between jobs
        """

        myThread = threading.currentThread()

        lumisPerJob = int(kwargs.get('lumis_per_job', 1))
        splitOnFile = bool(kwargs.get('halt_job_on_file_boundaries', True))
        ignoreACDC = bool(kwargs.get('ignore_acdc_except', False))
        collectionName = kwargs.get('collectionName', None)
        splitOnRun = kwargs.get('splitOnRun', True)
        getParents = kwargs.get('include_parents', False)
        runWhitelist = kwargs.get('runWhitelist', [])
        runs = kwargs.get('runs', None)
        lumis = kwargs.get('lumis', None)

        goodRunList = {}
        if runs and lumis:
            goodRunList = buildLumiMask(runs, lumis)

        # If we have runLumi info, we need to load it from couch
        if collectionName:
            try:
                from WMCore.ACDC.DataCollectionService import DataCollectionService
                couchURL = kwargs.get('couchURL')
                couchDB = kwargs.get('couchDB')
                filesetName = kwargs.get('filesetName')
                collectionName = kwargs.get('collectionName')
                owner = kwargs.get('owner')
                group = kwargs.get('group')

                logging.info('Creating jobs for ACDC fileset %s' % filesetName)
                dcs = DataCollectionService(couchURL, couchDB)
                goodRunList = dcs.getLumiWhitelist(collectionName, filesetName,
                                                   owner, group)
            except Exception, ex:
                msg = "Exception while trying to load goodRunList\n"
                if ignoreACDC:
                    msg += "Ditching goodRunList\n"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    logging.error(msg)
                    goodRunList = {}
                else:
                    msg += "Refusing to create any jobs.\n"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    logging.error(msg)
                    return
Пример #8
0
    def testBuildLumiMask(self):
        from WMCore.WMSpec.WMTask import buildLumiMask
        runs = ['3', '4']
        lumis = ['1,4,23,45', '5,84,234,445']
        expected = {'3': [[1, 4], [23, 45]], '4': [[5, 84], [234, 445]]}

        #working
        self.assertEqual(buildLumiMask(runs, lumis), expected, "buildLumiMask")

        #number of runs different than number of lumis
        runs = ['3']
        lumis = ['1,4,23,45', '5,84,234,445']
        self.assertRaises(ValueError, buildLumiMask, runs, lumis)

        #wrong format of the number of lumis
        runs = ['3', '4']
        lumis = ['1,4,23,45', '5,84,234']
        self.assertRaises(ValueError, buildLumiMask, runs, lumis)
Пример #9
0
    def testBuildLumiMask(self):
        from WMCore.WMSpec.WMTask import buildLumiMask
        runs = ['3', '4']
        lumis = ['1,4,23,45', '5,84,234,445']
        expected = {'3': [[1, 4], [23, 45]], '4': [[5, 84], [234, 445]]}

        # working
        self.assertEqual(buildLumiMask(runs, lumis), expected, "buildLumiMask")

        # number of runs different than number of lumis
        runs = ['3']
        lumis = ['1,4,23,45', '5,84,234,445']
        self.assertRaises(ValueError, buildLumiMask, runs, lumis)

        # wrong format of the number of lumis
        runs = ['3', '4']
        lumis = ['1,4,23,45', '5,84,234']
        self.assertRaises(ValueError, buildLumiMask, runs, lumis)
Пример #10
0
    def report(self, workflow, userdn, userproxy=None):
        res = {}
        self.logger.info("About to get output of workflow: %s. Getting status first." % workflow)
        statusRes = self.status(workflow, userdn, userproxy)[0]

        #load the lumimask
        rows = self.api.query(None, None, ID.sql, taskname = workflow)
        splitArgs = literal_eval(rows.next()[6].read())
        res['lumiMask'] = buildLumiMask(splitArgs['runs'], splitArgs['lumis'])

        #extract the finished jobs from filemetadata
        jobids = [x[1] for x in statusRes['jobList'] if x[0] in ['finished']]
        rows = self.api.query(None, None, GetFromPandaIds.sql, types='EDM', taskname=workflow, jobids=','.join(map(str,jobids)),\
                                        limit=len(jobids)*100)
        res['runsAndLumis'] = {}
        for row in rows:
            res['runsAndLumis'][str(row[GetFromPandaIds.PANDAID])] = { 'parents' : row[GetFromPandaIds.PARENTS].read(),
                    'runlumi' : row[GetFromPandaIds.RUNLUMI].read(),
                    'events'  : row[GetFromPandaIds.INEVENTS],
            }

        yield res
Пример #11
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Split files into a number of lumis per job
        Allow a flag to determine if we split files between jobs
        """

        myThread = threading.currentThread()

        lumisPerJob = int(kwargs.get('lumis_per_job', 1))
        splitOnFile = bool(kwargs.get('halt_job_on_file_boundaries', True))
        ignoreACDC = bool(kwargs.get('ignore_acdc_except', False))
        collectionName = kwargs.get('collectionName', None)
        splitOnRun = kwargs.get('splitOnRun', True)
        getParents = kwargs.get('include_parents', False)
        runWhitelist = kwargs.get('runWhitelist', [])
        runs = kwargs.get('runs', None)
        lumis = kwargs.get('lumis', None)
        deterministicPileup = kwargs.get('deterministicPileup', False)
        eventsPerLumiInDataset = 0

        if deterministicPileup and self.package == 'WMCore.WMBS':
            getJobNumber = self.daoFactory(
                classname="Jobs.GetNumberOfJobsPerWorkflow")
            jobNumber = getJobNumber.execute(
                workflow=self.subscription.getWorkflow().id)
            self.nJobs = jobNumber

        timePerEvent, sizePerEvent, memoryRequirement = \
                    self.getPerformanceParameters(kwargs.get('performance', {}))

        goodRunList = {}
        if runs and lumis:
            goodRunList = buildLumiMask(runs, lumis)

        # If we have runLumi info, we need to load it from couch
        if collectionName:
            try:
                from WMCore.ACDC.DataCollectionService import DataCollectionService
                couchURL = kwargs.get('couchURL')
                couchDB = kwargs.get('couchDB')
                filesetName = kwargs.get('filesetName')
                collectionName = kwargs.get('collectionName')
                owner = kwargs.get('owner')
                group = kwargs.get('group')

                logging.info('Creating jobs for ACDC fileset %s' % filesetName)
                dcs = DataCollectionService(couchURL, couchDB)
                goodRunList = dcs.getLumiWhitelist(collectionName, filesetName,
                                                   owner, group)
            except Exception, ex:
                msg = "Exception while trying to load goodRunList\n"
                if ignoreACDC:
                    msg += "Ditching goodRunList\n"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    logging.error(msg)
                    goodRunList = {}
                else:
                    msg += "Refusing to create any jobs.\n"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    logging.error(msg)
                    return
Пример #12
0
    def report(self, workflow, userdn, usedbs):
        """
        Computes the report for workflow. If usedbs is used also query DBS and return information about the input and output datasets
        """

        def _compactLumis(datasetInfo):
            """ Help function that allow to convert from runLumis divided per file (result of listDatasetFileDetails)
                to an aggregated result.
            """
            lumilist = {}
            for file, info in datasetInfo.iteritems():
                for run, lumis in info['Lumis'].iteritems():
                    lumilist.setdefault(str(run), []).extend(lumis)
            return lumilist

        res = {}
        self.logger.info("About to compute report of workflow: %s with usedbs=%s. Getting status first." % (workflow,usedbs))
        statusRes = self.status(workflow, userdn)[0]

        #get the information we need from the taskdb/initilize variables
        row = self.api.query(None, None, self.Task.ID_sql, taskname = workflow).next()
        row = self.Task.ID_tuple(*row)
        inputDataset = row.input_dataset
        outputDatasets = literal_eval(row.output_dataset.read() if row.output_dataset else 'None')
        dbsUrl = row.dbs_url

        #load the lumimask
        splitArgs = literal_eval(row.split_args.read())
        res['lumiMask'] = buildLumiMask(splitArgs['runs'], splitArgs['lumis'])
        self.logger.info("Lumi mask was: %s" % res['lumiMask'])

        #extract the finished jobs from filemetadata
        jobids = [x[1] for x in statusRes['jobList'] if x[0] in ['finished']]
        rows = self.api.query(None, None, self.FileMetaData.GetFromTaskAndType_sql, filetype='EDM,TFILE,POOLIN', taskname=workflow)

        res['runsAndLumis'] = {}
        for row in rows:
            if row[GetFromTaskAndType.PANDAID] in jobids:
                if str(row[GetFromTaskAndType.PANDAID]) not in res['runsAndLumis']:
                    res['runsAndLumis'][str(row[GetFromTaskAndType.PANDAID])] = []
                res['runsAndLumis'][str(row[GetFromTaskAndType.PANDAID])].append( { 'parents' : row[GetFromTaskAndType.PARENTS].read(),
                        'runlumi' : row[GetFromTaskAndType.RUNLUMI].read(),
                        'events'  : row[GetFromTaskAndType.INEVENTS],
                        'type'    : row[GetFromTaskAndType.TYPE],
                })
        self.logger.info("Got %s edm files for workflow %s" % (len(res['runsAndLumis']), workflow))

        if usedbs:
            if not outputDatasets:
                raise ExecutionError("Cannot find any information about the output datasets names. You can try to execute 'crab report' with --dbs=no")
            try:
                #load the input dataset's lumilist
                dbs = DBSReader(dbsUrl)
                inputDetails = dbs.listDatasetFileDetails(inputDataset)
                res['dbsInLumilist'] = _compactLumis(inputDetails)
                self.logger.info("Aggregated input lumilist: %s" % res['dbsInLumilist'])
                #load the output datasets' lumilist
                res['dbsNumEvents'] = 0
                res['dbsNumFiles'] = 0
                res['dbsOutLumilist'] = {}
                dbs = DBSReader("https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader") #We can only publish here with DBS3
                outLumis = []
                for outputDataset in outputDatasets:
                    outputDetails = dbs.listDatasetFileDetails(outputDataset)
                    outLumis.append(_compactLumis(outputDetails))
                    res['dbsNumEvents'] += sum(x['NumberOfEvents'] for x in outputDetails.values())
                    res['dbsNumFiles'] += sum(len(x['Parents']) for x in outputDetails.values())

                outLumis = LumiList(runsAndLumis = outLumis).compactList
                for run,lumis in outLumis.iteritems():
                    res['dbsOutLumilist'][run] = reduce(lambda x1,x2: x1+x2, map(lambda x: range(x[0], x[1]+1), lumis))
                self.logger.info("Aggregated output lumilist: %s" % res['dbsOutLumilist'])
            except Exception, ex:
                msg = "Failed to contact DBS: %s" % str(ex)
                self.logger.exception(msg)
                raise ExecutionError("Exception while contacting DBS. Cannot get the input/output lumi lists. You can try to execute 'crab report' with --dbs=no")
Пример #13
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Split up all the available files such that each job will process a
        maximum of 'files_per_job'.  If the 'files_per_job' parameters is not
        passed in jobs will process a maximum of 10 files.
        """

        filesPerJob   = int(kwargs.get("files_per_job", 10))
        jobsPerGroup  = int(kwargs.get("jobs_per_group", 0))
        totalFiles    = int(kwargs.get("total_files", 0))
        runs          = kwargs.get('runs', None)
        lumis         = kwargs.get('lumis', None)
        runBoundaries = kwargs.get("respect_run_boundaries", False)
        getParents    = kwargs.get("include_parents", False)
        filesInJob    = 0
        timePerEvent, sizePerEvent, memoryRequirement = \
                    self.getPerformanceParameters(kwargs.get('performance', {}))

        goodRunList = {}
        if runs and lumis:
            goodRunList = buildLumiMask(runs, lumis)

        #Get a dictionary of sites, files
        lDict = self.sortByLocation()
        locationDict = {}

        for key in lDict:
            newlist = []
            for f in lDict[key]:
                if runs and lumis:
                    ## Skip this file is it has no runs.
                    if len(f['runs']) == 0:
                        continue
                    f['lumiCount'] = 0
                    f['runs'] = sorted(f['runs'])
                    for run in f['runs']:
                        run.lumis.sort()
                        f['lumiCount'] += len(run.lumis)
                    f['lowestRun'] = f['runs'][0]
                    ## Skip this file is it has no lumis.
                    if f['lumiCount'] == 0:
                        continue
                    ## Do average event per lumi calculation.
                    f['avgEvtsPerLumi'] = round(float(f['events']) / f['lumiCount'])
                newlist.append(f)
            locationDict[key] = sorted(newlist, key = lambda f: f['lfn'])

        ## Make a list with all the files, sorting them by LFN. Remove from the list all
        ## the files filtered out by the lumi-mask (if there is one).
        files = []
        for filesPerLocSet in locationDict.values():
            for f in filesPerLocSet:
                files.append(f)
        if len(files):
            files = sorted(files, key = lambda f: f['lfn'])
            if runs and lumis:
                skippedFiles = []
                for f in files:
                    skipFile = True
                    for run in f['runs']:
                        if not isGoodRun(goodRunList, run.run):
                            continue
                        for lumi in run:
                            if not isGoodLumi(goodRunList, run.run, lumi):
                                continue
                            skipFile = False
                    if skipFile:
                        skippedFiles.append(f)
                for f in skippedFiles:
                    files.remove(f)

        ## Keep only the first totalFiles files. Remove the other files from the locationDict.
        if totalFiles > 0 and totalFiles < len(files):
            removedFiles = files[totalFiles:]
            files = files[:totalFiles]
            for f in removedFiles:
                for locSet in locationDict.keys():
                    if f in locationDict[locSet]:
                        locationDict[locSet].remove(f)

        for locSet in locationDict.keys():
            #Now we have all the files in a certain location set
            fileList = locationDict[locSet]
            filesInJob  = 0
            jobsInGroup = 0
            self.newGroup()
            if len(fileList) == 0:
                continue
            jobRun = None
            for f in fileList:
                if getParents:
                    parentLFNs = self.findParent(lfn = f['lfn'])
                    for lfn in parentLFNs:
                        parent = File(lfn = lfn)
                        f['parents'].add(parent)
                fileRun = f.get('minrun', None)
                createNewJob = False
                if filesInJob == 0 or filesInJob == filesPerJob or (runBoundaries and fileRun != jobRun):
                    createNewJob = True
                if runs and lumis:
                    for run in f['runs']:
                        if not isGoodRun(goodRunList, run.run):
                            continue
                        firstLumi = None
                        lastLumi = None
                        for lumi in run:
                            if not isGoodLumi(goodRunList, run.run, lumi):
                                if firstLumi != None and lastLumi != None:
                                    self.currentJob['mask'].addRunAndLumis(run = run.run, lumis = [firstLumi, lastLumi])
                                    addedEvents = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                                    runAddedTime = addedEvents * timePerEvent
                                    runAddedSize = addedEvents * sizePerEvent
                                    self.currentJob.addResourceEstimates(jobTime = runAddedTime, disk = runAddedSize)
                                    firstLumi = None
                                    lastLumi = None
                                continue
                            if lastLumi != None and lumi != lastLumi + 1:
                                self.currentJob['mask'].addRunAndLumis(run = run.run, lumis = [firstLumi, lastLumi])
                                addedEvents = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                                runAddedTime = addedEvents * timePerEvent
                                runAddedSize = addedEvents * sizePerEvent
                                self.currentJob.addResourceEstimates(jobTime = runAddedTime, disk = runAddedSize)
                                firstLumi = None
                                lastLumi = None
                            if createNewJob:
                                if jobsPerGroup:
                                    if jobsInGroup > jobsPerGroup:
                                        self.newGroup()
                                        jobsInGroup = 0
                                self.newJob(name = self.getJobName())
                                self.currentJob.addResourceEstimates(memory = memoryRequirement)
                                filesInJob = 0
                                jobsInGroup += 1
                                jobRun = fileRun
                                createNewJob = False
                                self.currentJob.addFile(f)
                                filesInJob += 1
                            if firstLumi == None:
                                firstLumi = lumi
                            lastLumi = lumi
                            if self.currentJob and not f in self.currentJob['input_files']:
                                self.currentJob.addFile(f)
                                filesInJob += 1
                        if firstLumi != None and lastLumi != None:
                            self.currentJob['mask'].addRunAndLumis(run = run.run, lumis = [firstLumi, lastLumi])
                            addedEvents = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                            runAddedTime = addedEvents * timePerEvent
                            runAddedSize = addedEvents * sizePerEvent
                            self.currentJob.addResourceEstimates(jobTime = runAddedTime, disk = runAddedSize)
                            firstLumi = None
                            lastLumi = None
                else:
                    if createNewJob:
                        if jobsPerGroup:
                            if jobsInGroup > jobsPerGroup:
                                self.newGroup()
                                jobsInGroup = 0
                        self.newJob(name = self.getJobName())
                        self.currentJob.addResourceEstimates(memory = memoryRequirement)
                        filesInJob = 0
                        jobsInGroup += 1
                        jobRun = fileRun
                    self.currentJob.addFile(f)
                    filesInJob += 1
                    fileTime = f['events'] * timePerEvent
                    fileSize = f['events'] * sizePerEvent
                    self.currentJob.addResourceEstimates(jobTime = fileTime, disk = fileSize)

        return
Пример #14
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Split files into a number of lumis per job
        Allow a flag to determine if we split files between jobs
        """

        avgEventsPerJob = int(kwargs.get('events_per_job', 5000))
        jobLimit = int(kwargs.get('job_limit', 0))
        jobTimeLimit = int(
            kwargs.get('job_time_limit', self.defaultJobTimeLimit))
        totalEvents = int(kwargs.get('total_events', 0))
        splitOnFile = bool(kwargs.get('halt_job_on_file_boundaries', False))
        self.collectionName = kwargs.get('collectionName', None)
        splitOnRun = kwargs.get('splitOnRun', True)
        getParents = kwargs.get('include_parents', False)
        runWhitelist = kwargs.get('runWhitelist', [])
        runs = kwargs.get('runs', None)
        lumis = kwargs.get('lumis', None)
        applyLumiCorrection = bool(kwargs.get('applyLumiCorrection', False))
        deterministicPileup = kwargs.get('deterministicPileup', False)

        timePerEvent, sizePerEvent, memoryRequirement = \
            self.getPerformanceParameters(kwargs.get('performance', {}))

        eventsPerLumiInDataset = 0

        if avgEventsPerJob <= 0:
            msg = "events_per_job parameter must be positive. Its value is: %d" % avgEventsPerJob
            raise RuntimeError(msg)

        if self.package == 'WMCore.WMBS':
            self.loadRunLumi = self.daoFactory(
                classname="Files.GetBulkRunLumi")
            if deterministicPileup:
                getJobNumber = self.daoFactory(
                    classname="Jobs.GetNumberOfJobsPerWorkflow")
                self.nJobs = getJobNumber.execute(
                    workflow=self.subscription.getWorkflow().id)
                logging.info(
                    'Creating jobs in DeterministicPileup mode for %s',
                    self.subscription.workflowName())

        goodRunList = {}
        if runs and lumis:
            goodRunList = buildLumiMask(runs, lumis)

        # If we have runLumi info, we need to load it from couch
        if self.collectionName:
            try:
                from WMCore.ACDC.DataCollectionService import DataCollectionService
                couchURL = kwargs.get('couchURL')
                couchDB = kwargs.get('couchDB')
                filesetName = kwargs.get('filesetName')

                logging.info('Creating jobs for ACDC fileset %s', filesetName)
                dcs = DataCollectionService(couchURL, couchDB)
                goodRunList = dcs.getLumiWhitelist(self.collectionName,
                                                   filesetName)
            except Exception as ex:
                msg = "Exception while trying to load goodRunList. "
                msg += "Refusing to create any jobs.\nDetails: %s" % str(ex)
                logging.exception(msg)
                return

        lDict = self.getFilesSortedByLocation(avgEventsPerJob)
        if not lDict:
            logging.info(
                "There are not enough events/files to be splitted. Trying again next cycle"
            )
            return

        locationDict = {}
        for key in lDict.keys():
            newlist = []
            # First we need to load the data
            if self.loadRunLumi:
                fileLumis = self.loadRunLumi.execute(files=lDict[key])
                if not fileLumis:
                    logging.warning(
                        "Empty fileLumis dict for workflow %s, subs %s.",
                        self.subscription.workflowName(),
                        self.subscription['id'])
                for f in lDict[key]:
                    lumiDict = fileLumis.get(f['id'], {})
                    for run in lumiDict.keys():
                        f.addRun(run=Run(run, *lumiDict[run]))

            for f in lDict[key]:
                if len(f['runs']) == 0:
                    continue
                f['runs'] = sorted(f['runs'])
                f['lumiCount'] = 0
                for run in f['runs']:
                    run.lumis.sort()
                    f['lumiCount'] += len(run.lumis)
                f['lowestRun'] = f['runs'][0]

                # Do average event per lumi calculation
                if f['lumiCount']:
                    f['avgEvtsPerLumi'] = round(
                        float(f['events']) / f['lumiCount'])
                    if deterministicPileup:
                        # We assume that all lumis are equal in the dataset
                        eventsPerLumiInDataset = f['avgEvtsPerLumi']
                else:
                    # No lumis in the file, ignore it
                    continue
                newlist.append(f)

            locationDict[key] = sorted(newlist,
                                       key=operator.itemgetter('lowestRun'))

        totalJobs = 0
        lastLumi = None
        firstLumi = None
        lastRun = None
        lumisInJob = 0
        totalAvgEventCount = 0
        currentJobAvgEventCount = 0
        stopTask = False
        self.lumiChecker = LumiChecker(applyLumiCorrection)
        for location in locationDict:

            # For each location, we need a new jobGroup
            self.newGroup()
            stopJob = True
            for f in locationDict[location]:

                if getParents:
                    parentLFNs = self.findParent(lfn=f['lfn'])
                    for lfn in parentLFNs:
                        parent = File(lfn=lfn)
                        f['parents'].add(parent)

                lumisInJobInFile = 0
                updateSplitOnJobStop = False
                failNextJob = False
                # If estimated job time is higher the job time limit (condor limit)
                # and it's only one lumi then ditch that lumi
                timePerLumi = f['avgEvtsPerLumi'] * timePerEvent
                if timePerLumi > jobTimeLimit and f['lumiCount'] == 1:
                    failNextJob = True
                    stopJob = True
                    lumisPerJob = 1
                elif splitOnFile:
                    # Then we have to split on every boundary
                    stopJob = True
                    # Check the average number of events per lumi in this file
                    # Adapt the lumis per job to match the target conditions
                    if f['avgEvtsPerLumi']:
                        # If there are events in the file
                        ratio = float(avgEventsPerJob) / f['avgEvtsPerLumi']
                        lumisPerJob = max(int(math.floor(ratio)), 1)
                    else:
                        # Zero event file, then the ratio goes to infinity. Computers don't like that
                        lumisPerJob = f['lumiCount']
                else:
                    # Analyze how many events does this job already has
                    # Check how many we want as target, include as many lumi sections as possible
                    updateSplitOnJobStop = True
                    eventsRemaining = max(
                        avgEventsPerJob - currentJobAvgEventCount, 0)
                    if f['avgEvtsPerLumi']:
                        lumisAllowed = int(
                            math.floor(
                                float(eventsRemaining) / f['avgEvtsPerLumi']))
                    else:
                        lumisAllowed = f['lumiCount']
                    lumisPerJob = max(lumisInJob + lumisAllowed, 1)

                for run in f['runs']:
                    if not isGoodRun(goodRunList=goodRunList, run=run.run):
                        # Then skip this one
                        continue
                    if len(runWhitelist) > 0 and not run.run in runWhitelist:
                        # Skip due to run whitelist
                        continue
                    firstLumi = None

                    if splitOnRun and run.run != lastRun:
                        # Then we need to kill this job and get a new one
                        stopJob = True

                    # Now loop over the lumis
                    for lumi in run:
                        if (not isGoodLumi(goodRunList, run=run.run, lumi=lumi)
                                or self.lumiChecker.isSplitLumi(
                                    run.run, lumi, f)):
                            # Kill the chain of good lumis
                            # Skip this lumi
                            if firstLumi != None and firstLumi != lumi:
                                self.currentJob['mask'].addRunAndLumis(
                                    run=run.run, lumis=[firstLumi, lastLumi])
                                eventsAdded = ((lastLumi - firstLumi + 1) *
                                               f['avgEvtsPerLumi'])
                                runAddedTime = eventsAdded * timePerEvent
                                runAddedSize = eventsAdded * sizePerEvent
                                self.currentJob.addResourceEstimates(
                                    jobTime=runAddedTime, disk=runAddedSize)
                                firstLumi = None
                                lastLumi = None
                            continue

                        # You have to kill the lumi chain if they're not continuous
                        if lastLumi and not lumi == lastLumi + 1:
                            self.currentJob['mask'].addRunAndLumis(
                                run=run.run, lumis=[firstLumi, lastLumi])
                            eventsAdded = ((lastLumi - firstLumi + 1) *
                                           f['avgEvtsPerLumi'])
                            runAddedTime = eventsAdded * timePerEvent
                            runAddedSize = eventsAdded * sizePerEvent
                            self.currentJob.addResourceEstimates(
                                jobTime=runAddedTime, disk=runAddedSize)
                            firstLumi = None
                            lastLumi = None

                        if firstLumi is None:
                            # Set the first lumi in the run
                            firstLumi = lumi

                        # If we're full, end the job
                        if lumisInJob == lumisPerJob:
                            stopJob = True
                        # Actually do the new job creation
                        if stopJob:
                            if firstLumi != None and lastLumi != None and lastRun != None:
                                self.currentJob['mask'].addRunAndLumis(
                                    run=lastRun, lumis=[firstLumi, lastLumi])
                                eventsAdded = ((lastLumi - firstLumi + 1) *
                                               f['avgEvtsPerLumi'])
                                runAddedTime = eventsAdded * timePerEvent
                                runAddedSize = eventsAdded * sizePerEvent
                                self.currentJob.addResourceEstimates(
                                    jobTime=runAddedTime, disk=runAddedSize)
                            msg = None
                            if failNextJob:
                                msg = "File %s has a single lumi %s, in run %s " % (
                                    f['lfn'], lumi, run.run)
                                msg += "with too many events %d and it woud take %d sec to run" \
                                       % (f['events'], timePerLumi)
                            self.lumiChecker.closeJob(self.currentJob)
                            self.newJob(name=self.getJobName(),
                                        failedJob=failNextJob,
                                        failedReason=msg)
                            if deterministicPileup:
                                skipEvents = (
                                    self.nJobs -
                                    1) * lumisPerJob * eventsPerLumiInDataset
                                self.currentJob.addBaggageParameter(
                                    "skipPileupEvents", skipEvents)
                            self.currentJob.addResourceEstimates(
                                memory=memoryRequirement)
                            failNextJob = False
                            firstLumi = lumi
                            lumisInJob = 0
                            lumisInJobInFile = 0
                            currentJobAvgEventCount = 0
                            totalJobs += 1
                            if jobLimit and totalJobs > jobLimit:
                                msg = "Job limit of {0} jobs exceeded.".format(
                                    jobLimit)
                                raise RuntimeError(msg)

                            # Add the file to new jobs
                            self.currentJob.addFile(f)

                            if updateSplitOnJobStop:
                                # Then we were carrying from a previous file
                                # Reset calculations for this file
                                updateSplitOnJobStop = False
                                if f['avgEvtsPerLumi']:
                                    ratio = float(
                                        avgEventsPerJob) / f['avgEvtsPerLumi']
                                    lumisPerJob = max(int(math.floor(ratio)),
                                                      1)
                                else:
                                    lumisPerJob = f['lumiCount']

                        lumisInJob += 1
                        lumisInJobInFile += 1
                        lastLumi = lumi
                        stopJob = False
                        lastRun = run.run
                        totalAvgEventCount += f['avgEvtsPerLumi']

                        if self.currentJob and not f in self.currentJob[
                                'input_files']:
                            self.currentJob.addFile(f)

                        # We stop here if there are more total events than requested.
                        if totalEvents > 0 and totalAvgEventCount >= totalEvents:
                            stopTask = True
                            break

                    if firstLumi != None and lastLumi != None:
                        # Add this run to the mask
                        self.currentJob['mask'].addRunAndLumis(
                            run=run.run, lumis=[firstLumi, lastLumi])
                        eventsAdded = ((lastLumi - firstLumi + 1) *
                                       f['avgEvtsPerLumi'])
                        runAddedTime = eventsAdded * timePerEvent
                        runAddedSize = eventsAdded * sizePerEvent
                        self.currentJob.addResourceEstimates(
                            jobTime=runAddedTime, disk=runAddedSize)
                        firstLumi = None
                        lastLumi = None

                    if stopTask:
                        break

                if not splitOnFile:
                    currentJobAvgEventCount += f[
                        'avgEvtsPerLumi'] * lumisInJobInFile

                if stopTask:
                    break

            if stopTask:
                break

        self.lumiChecker.closeJob(self.currentJob)
        self.lumiChecker.fixInputFiles()
        return
Пример #15
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Split files into a number of lumis per job
        Allow a flag to determine if we split files between jobs
        """

        avgEventsPerJob = int(kwargs.get('events_per_job', 5000))
        eventLimit      = int(kwargs.get('max_events_per_lumi', 20000))
        totalEvents     = int(kwargs.get('total_events', 0))
        splitOnFile     = bool(kwargs.get('halt_job_on_file_boundaries', True))
        ignoreACDC      = bool(kwargs.get('ignore_acdc_except', False))
        collectionName  = kwargs.get('collectionName', None)
        splitOnRun      = kwargs.get('splitOnRun', True)
        getParents      = kwargs.get('include_parents', False)
        runWhitelist    = kwargs.get('runWhitelist', [])
        runs            = kwargs.get('runs', None)
        lumis           = kwargs.get('lumis', None)
        timePerEvent, sizePerEvent, memoryRequirement = \
                    self.getPerformanceParameters(kwargs.get('performance', {}))
        deterministicPileup = kwargs.get('deterministicPileup', False)
        eventsPerLumiInDataset = 0

        if deterministicPileup and self.package == 'WMCore.WMBS':
            getJobNumber = self.daoFactory(classname = "Jobs.GetNumberOfJobsPerWorkflow")
            jobNumber = getJobNumber.execute(workflow = self.subscription.getWorkflow().id)
            self.nJobs = jobNumber

        goodRunList = {}
        if runs and lumis:
            goodRunList = buildLumiMask(runs, lumis)

        # If we have runLumi info, we need to load it from couch
        if collectionName:
            try:
                from WMCore.ACDC.DataCollectionService import DataCollectionService
                couchURL       = kwargs.get('couchURL')
                couchDB        = kwargs.get('couchDB')
                filesetName    = kwargs.get('filesetName')
                collectionName = kwargs.get('collectionName')
                owner          = kwargs.get('owner')
                group          = kwargs.get('group')

                logging.info('Creating jobs for ACDC fileset %s' % filesetName)
                dcs = DataCollectionService(couchURL, couchDB)
                goodRunList = dcs.getLumiWhitelist(collectionName, filesetName, owner, group)
            except Exception as ex:
                msg =  "Exception while trying to load goodRunList\n"
                if ignoreACDC:
                    msg +=  "Ditching goodRunList\n"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    logging.error(msg)
                    goodRunList = {}
                else:
                    msg +=  "Refusing to create any jobs.\n"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    logging.error(msg)
                    return

        lDict = self.sortByLocation()
        locationDict = {}

        # First we need to load the data
        if self.package == 'WMCore.WMBS':
            loadRunLumi = self.daoFactory(classname = "Files.GetBulkRunLumi")

        for key in lDict.keys():
            newlist = []
            # First we need to load the data
            if self.package == 'WMCore.WMBS':
                fileLumis = loadRunLumi.execute(files = lDict[key])
                for f in lDict[key]:
                    lumiDict = fileLumis.get(f['id'], {})
                    for run in lumiDict.keys():
                        f.addRun(run = Run(run, *lumiDict[run]))

            for f in lDict[key]:
                if len(f['runs']) == 0:
                    continue
                f['runs'] = sorted(f['runs'])
                f['lumiCount'] = 0
                for run in f['runs']:
                    run.lumis.sort()
                    f['lumiCount'] += len(run.lumis)
                f['lowestRun'] = f['runs'][0]

                #Do average event per lumi calculation
                if f['lumiCount']:
                    f['avgEvtsPerLumi'] = round(float(f['events'])/f['lumiCount'])
                    if deterministicPileup:
                        # We assume that all lumis are equal in the dataset
                        eventsPerLumiInDataset = f['avgEvtsPerLumi']
                else:
                    #No lumis in the file, ignore it
                    continue
                newlist.append(f)


            locationDict[key] = sorted(newlist, key=operator.itemgetter('lowestRun'))

        totalJobs      = 0
        lastLumi       = None
        firstLumi      = None
        lastRun        = None
        lumisInJob     = 0
        totalAvgEventCount = 0
        currentJobAvgEventCount = 0
        stopTask = False
        for location in locationDict:

            # For each location, we need a new jobGroup
            self.newGroup()
            stopJob = True
            for f in locationDict[location]:

                if getParents:
                    parentLFNs = self.findParent(lfn = f['lfn'])
                    for lfn in parentLFNs:
                        parent = File(lfn = lfn)
                        f['parents'].add(parent)

                lumisInJobInFile = 0
                updateSplitOnJobStop = False
                failNextJob          = False
                #If the number of events per lumi is higher than the limit
                #and it's only one lumi then ditch that lumi
                if f['avgEvtsPerLumi'] > eventLimit and f['lumiCount'] == 1:
                    failNextJob = True
                    stopJob = True
                    lumisPerJob = 1
                elif splitOnFile:
                    # Then we have to split on every boundary
                    stopJob = True
                    #Check the average number of events per lumi in this file
                    #Adapt the lumis per job to match the target conditions
                    if f['avgEvtsPerLumi']:
                        #If there are events in the file
                        ratio = float(avgEventsPerJob) / f['avgEvtsPerLumi']
                        lumisPerJob = max(int(math.floor(ratio)), 1)
                    else:
                        #Zero event file, then the ratio goes to infinity. Computers don't like that
                        lumisPerJob = f['lumiCount']
                else:
                    #Analyze how many events does this job already has
                    #Check how many we want as target, include as many lumi sections as possible
                    updateSplitOnJobStop = True
                    eventsRemaining = max(avgEventsPerJob - currentJobAvgEventCount, 0)
                    if f['avgEvtsPerLumi']:
                        lumisAllowed = int(math.floor(float(eventsRemaining) / f['avgEvtsPerLumi']))
                    else:
                        lumisAllowed = f['lumiCount']
                    lumisPerJob = max(lumisInJob + lumisAllowed, 1)

                for run in f['runs']:
                    if not isGoodRun(goodRunList = goodRunList, run = run.run):
                        # Then skip this one
                        continue
                    if len(runWhitelist) > 0 and not run.run in runWhitelist:
                        # Skip due to run whitelist
                        continue
                    firstLumi = None

                    if splitOnRun and run.run != lastRun:
                        # Then we need to kill this job and get a new one
                        stopJob = True

                    # Now loop over the lumis
                    for lumi in run:
                        if not isGoodLumi(goodRunList, run = run.run, lumi = lumi):
                            # Kill the chain of good lumis
                            # Skip this lumi
                            if firstLumi != None and firstLumi != lumi:
                                self.currentJob['mask'].addRunAndLumis(run = run.run,
                                                                       lumis = [firstLumi, lastLumi])
                                eventsAdded = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                                runAddedTime = eventsAdded * timePerEvent
                                runAddedSize = eventsAdded * sizePerEvent
                                self.currentJob.addResourceEstimates(jobTime = runAddedTime, disk = runAddedSize)
                                firstLumi = None
                                lastLumi = None
                            continue

                        # You have to kill the lumi chain if they're not continuous
                        if lastLumi and not lumi == lastLumi + 1:
                            self.currentJob['mask'].addRunAndLumis(run = run.run,
                                                                   lumis = [firstLumi, lastLumi])
                            eventsAdded = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                            runAddedTime = eventsAdded * timePerEvent
                            runAddedSize = eventsAdded * sizePerEvent
                            self.currentJob.addResourceEstimates(jobTime = runAddedTime, disk = runAddedSize)
                            firstLumi = None
                            lastLumi = None

                        if firstLumi == None:
                            # Set the first lumi in the run
                            firstLumi = lumi

                        # If we're full, end the job
                        if lumisInJob == lumisPerJob:
                            stopJob = True
                        # Actually do the new job creation
                        if stopJob:
                            if firstLumi != None and lastLumi != None and lastRun != None:
                                self.currentJob['mask'].addRunAndLumis(run = lastRun,
                                                                       lumis = [firstLumi, lastLumi])
                                eventsAdded = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                                runAddedTime = eventsAdded * timePerEvent
                                runAddedSize = eventsAdded * sizePerEvent
                                self.currentJob.addResourceEstimates(jobTime = runAddedTime, disk = runAddedSize)
                            msg = None
                            if failNextJob:
                                msg = "File %s has too many events (%d) in %d lumi(s)" % (f['lfn'],
                                                                                          f['events'],
                                                                                          f['lumiCount'])
                            self.newJob(name = self.getJobName(), failedJob = failNextJob,
                                        failedReason = msg)
                            if deterministicPileup:
                                self.currentJob.addBaggageParameter("skipPileupEvents", (self.nJobs - 1) * lumisPerJob * eventsPerLumiInDataset)
                            self.currentJob.addResourceEstimates(memory = memoryRequirement)
                            failNextJob = False
                            firstLumi = lumi
                            lumisInJob = 0
                            lumisInJobInFile = 0
                            currentJobAvgEventCount = 0
                            totalJobs += 1

                            # Add the file to new jobs
                            self.currentJob.addFile(f)

                            if updateSplitOnJobStop:
                                #Then we were carrying from a previous file
                                #Reset calculations for this file
                                updateSplitOnJobStop = False
                                if f['avgEvtsPerLumi']:
                                    ratio = float(avgEventsPerJob) / f['avgEvtsPerLumi']
                                    lumisPerJob = max(int(math.floor(ratio)), 1)
                                else:
                                    lumisPerJob = f['lumiCount']

                        lumisInJob += 1
                        lumisInJobInFile += 1
                        lastLumi = lumi
                        stopJob = False
                        lastRun = run.run
                        totalAvgEventCount += f['avgEvtsPerLumi']

                        if self.currentJob and not f in self.currentJob['input_files']:
                            self.currentJob.addFile(f)

                        # We stop here if there are more total events than requested.
                        if totalEvents > 0 and totalAvgEventCount >= totalEvents:
                            stopTask = True
                            break

                    if firstLumi != None and lastLumi != None:
                        # Add this run to the mask
                        self.currentJob['mask'].addRunAndLumis(run = run.run,
                                                               lumis = [firstLumi, lastLumi])
                        eventsAdded = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                        runAddedTime = eventsAdded * timePerEvent
                        runAddedSize = eventsAdded * sizePerEvent
                        self.currentJob.addResourceEstimates(jobTime = runAddedTime, disk = runAddedSize)
                        firstLumi = None
                        lastLumi = None

                    if stopTask:
                        break

                if not splitOnFile:
                    currentJobAvgEventCount += f['avgEvtsPerLumi'] * lumisInJobInFile

                if stopTask:
                    break

            if stopTask:
                break

        return
Пример #16
0
    def report(self, workflow, userdn, usedbs):
        """
        Computes the report for workflow. If usedbs is used also query DBS and return information about the input and output datasets
        """

        def _compactLumis(datasetInfo):
            """ Help function that allow to convert from runLumis divided per file (result of listDatasetFileDetails)
                to an aggregated result.
            """
            lumilist = {}
            for dummyfile, info in datasetInfo.iteritems():
                for run, lumis in info['Lumis'].iteritems():
                    lumilist.setdefault(str(run), []).extend(lumis)
            return lumilist

        res = {}
        self.logger.info("About to compute report of workflow: %s with usedbs=%s. Getting status first." % (workflow, usedbs))
        statusRes = self.status(workflow, userdn)[0]

        #get the information we need from the taskdb/initilize variables
        row = next(self.api.query(None, None, self.Task.ID_sql, taskname = workflow))
        row = self.Task.ID_tuple(*row)
        inputDataset = row.input_dataset
        outputDatasets = literal_eval(row.output_dataset.read() if row.output_dataset else 'None')
        dbsUrl = row.dbs_url

        #load the lumimask
        splitArgs = literal_eval(row.split_args.read())
        res['lumiMask'] = buildLumiMask(splitArgs['runs'], splitArgs['lumis'])
        self.logger.info("Lumi mask was: %s" % res['lumiMask'])

        #extract the finished jobs from filemetadata
        jobids = [x[1] for x in statusRes['jobList'] if x[0] in ['finished']]
        rows = self.api.query(None, None, self.FileMetaData.GetFromTaskAndType_sql, filetype='EDM,TFILE,POOLIN', taskname=workflow)

        res['runsAndLumis'] = {}
        for row in rows:
            if row[GetFromTaskAndType.PANDAID] in jobids:
                if str(row[GetFromTaskAndType.PANDAID]) not in res['runsAndLumis']:
                    res['runsAndLumis'][str(row[GetFromTaskAndType.PANDAID])] = []
                res['runsAndLumis'][str(row[GetFromTaskAndType.PANDAID])].append( { 'parents': row[GetFromTaskAndType.PARENTS].read(),
                        'runlumi': row[GetFromTaskAndType.RUNLUMI].read(),
                        'events': row[GetFromTaskAndType.INEVENTS],
                        'type': row[GetFromTaskAndType.TYPE],
                        'lfn': row[GetFromTaskAndType.LFN],
                })
        self.logger.info("Got %s edm files for workflow %s" % (len(res['runsAndLumis']), workflow))

        if usedbs:
            if not outputDatasets:
                raise ExecutionError("Cannot find any information about the output datasets names. You can try to execute 'crab report' with --dbs=no")
            try:
                #load the input dataset's lumilist
                dbs = DBSReader(dbsUrl)
                inputDetails = dbs.listDatasetFileDetails(inputDataset)
                res['dbsInLumilist'] = _compactLumis(inputDetails)
                self.logger.info("Aggregated input lumilist: %s" % res['dbsInLumilist'])
                #load the output datasets' lumilist
                res['dbsNumEvents'] = 0
                res['dbsNumFiles'] = 0
                res['dbsOutLumilist'] = {}
                dbs = DBSReader("https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader") #We can only publish here with DBS3
                outLumis = []
                for outputDataset in outputDatasets:
                    outputDetails = dbs.listDatasetFileDetails(outputDataset)
                    outLumis.append(_compactLumis(outputDetails))
                    res['dbsNumEvents'] += sum(x['NumberOfEvents'] for x in outputDetails.values())
                    res['dbsNumFiles'] += sum(len(x['Parents']) for x in outputDetails.values())

                outLumis = LumiList(runsAndLumis = outLumis).compactList
                for run, lumis in outLumis.iteritems():
                    res['dbsOutLumilist'][run] = reduce(lambda x1, x2: x1+x2, map(lambda x: range(x[0], x[1]+1), lumis))
                self.logger.info("Aggregated output lumilist: %s" % res['dbsOutLumilist'])
            except Exception as ex:
                msg = "Failed to contact DBS: %s" % str(ex)
                self.logger.exception(msg)
                raise ExecutionError("Exception while contacting DBS. Cannot get the input/output lumi lists. You can try to execute 'crab report' with --dbs=no")

        yield res
Пример #17
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Split files into a number of lumis per job
        Allow a flag to determine if we split files between jobs
        """

        myThread = threading.currentThread()

        lumisPerJob = int(kwargs.get('lumis_per_job', 1))
        totalLumis = int(kwargs.get('total_lumis', 0))
        splitOnFile = bool(kwargs.get('halt_job_on_file_boundaries', True))
        ignoreACDC = bool(kwargs.get('ignore_acdc_except', False))
        collectionName = kwargs.get('collectionName', None)
        splitOnRun = kwargs.get('splitOnRun', True)
        getParents = kwargs.get('include_parents', False)
        runWhitelist = kwargs.get('runWhitelist', [])
        runs = kwargs.get('runs', None)
        lumis = kwargs.get('lumis', None)
        deterministicPileup = kwargs.get('deterministicPileup', False)
        applyLumiCorrection = bool(kwargs.get('applyLumiCorrection', False))
        eventsPerLumiInDataset = 0

        if deterministicPileup and self.package == 'WMCore.WMBS':
            getJobNumber = self.daoFactory(classname = "Jobs.GetNumberOfJobsPerWorkflow")
            jobNumber = getJobNumber.execute(workflow = self.subscription.getWorkflow().id)
            self.nJobs = jobNumber

        timePerEvent, sizePerEvent, memoryRequirement = \
                    self.getPerformanceParameters(kwargs.get('performance', {}))

        goodRunList = {}
        if runs and lumis:
            goodRunList = buildLumiMask(runs, lumis)

        # If we have runLumi info, we need to load it from couch
        if collectionName:
            try:
                from WMCore.ACDC.DataCollectionService import DataCollectionService
                couchURL = kwargs.get('couchURL')
                couchDB = kwargs.get('couchDB')
                filesetName = kwargs.get('filesetName')
                collectionName = kwargs.get('collectionName')

                logging.info('Creating jobs for ACDC fileset %s' % filesetName)
                dcs = DataCollectionService(couchURL, couchDB)
                goodRunList = dcs.getLumiWhitelist(collectionName, filesetName)
            except Exception as ex:
                msg = "Exception while trying to load goodRunList\n"
                if ignoreACDC:
                    msg += "Ditching goodRunList\n"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    logging.error(msg)
                    goodRunList = {}
                else:
                    msg += "Refusing to create any jobs.\n"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    logging.error(msg)
                    return

        lDict = self.sortByLocation()
        locationDict = {}

        # First we need to load the data
        if self.package == 'WMCore.WMBS':
            loadRunLumi = self.daoFactory(classname = "Files.GetBulkRunLumi")

        for key in lDict.keys():
            newlist = []
            # First we need to load the data
            if self.package == 'WMCore.WMBS':
                fileLumis = loadRunLumi.execute(files = lDict[key])
                for f in lDict[key]:
                    lumiDict = fileLumis.get(f['id'], {})
                    for run in lumiDict.keys():
                        f.addRun(run = Run(run, *lumiDict[run]))

            for f in lDict[key]:
                # if hasattr(f, 'loadData'):
                #    f.loadData()
                if len(f['runs']) == 0:
                    continue
                f['lumiCount'] = 0
                f['runs'] = sorted(f['runs'])
                for run in f['runs']:
                    run.lumis.sort()
                    f['lumiCount'] += len(run.lumis)
                f['lowestRun'] = f['runs'][0]
                # Do average event per lumi calculation
                if f['lumiCount']:
                    f['avgEvtsPerLumi'] = round(float(f['events']) / f['lumiCount'])
                    if deterministicPileup:
                        # We assume that all lumis are equal in the dataset
                        eventsPerLumiInDataset = f['avgEvtsPerLumi']
                else:
                    # No lumis in the file, ignore it
                    continue
                newlist.append(f)
            locationDict[key] = sorted(newlist, key = operator.itemgetter('lowestRun'))

        # Split files into jobs with each job containing
        # EXACTLY lumisPerJob number of lumis (except for maybe the last one)

        totalJobs = 0
        lastLumi = None
        firstLumi = None
        stopJob = True
        stopTask = False
        lastRun = None
        lumisInJob = 0
        lumisInTask = 0
        self.lumiChecker = LumiChecker(applyLumiCorrection)
        for location in locationDict.keys():

            # For each location, we need a new jobGroup
            self.newGroup()
            stopJob = True
            for f in locationDict[location]:
                if getParents:
                    parentLFNs = self.findParent(lfn = f['lfn'])
                    for lfn in parentLFNs:
                        parent = File(lfn = lfn)
                        f['parents'].add(parent)

                if splitOnFile:
                    # Then we have to split on every boundary
                    stopJob = True

                for run in f['runs']:
                    if not isGoodRun(goodRunList = goodRunList, run = run.run):
                        # Then skip this one
                        continue
                    if len(runWhitelist) > 0 and not run.run in runWhitelist:
                        # Skip due to run whitelist
                        continue
                    firstLumi = None

                    if splitOnRun and run.run != lastRun:
                        # Then we need to kill this job and get a new one
                        stopJob = True

                    # Now loop over the lumis
                    for lumi in run:
                        if (not isGoodLumi(goodRunList, run = run.run, lumi = lumi)
                                or self.lumiChecker.isSplitLumi(run.run, lumi, f)): # splitLumi checks if the lumi is split across jobs
                            # Kill the chain of good lumis
                            # Skip this lumi
                            if firstLumi != None and firstLumi != lumi:
                                self.currentJob['mask'].addRunAndLumis(run = run.run,
                                                                       lumis = [firstLumi, lastLumi])
                                addedEvents = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                                runAddedTime = addedEvents * timePerEvent
                                runAddedSize = addedEvents * sizePerEvent
                                self.currentJob.addResourceEstimates(jobTime = runAddedTime,
                                                                     disk = runAddedSize)
                                firstLumi = None
                                lastLumi = None
                            continue

                        # You have to kill the lumi chain if they're not continuous
                        if lastLumi and not lumi == lastLumi + 1:
                            self.currentJob['mask'].addRunAndLumis(run = run.run,
                                                                   lumis = [firstLumi, lastLumi])
                            addedEvents = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                            runAddedTime = addedEvents * timePerEvent
                            runAddedSize = addedEvents * sizePerEvent
                            self.currentJob.addResourceEstimates(jobTime = runAddedTime,
                                                                 disk = runAddedSize)
                            firstLumi = None
                            lastLumi = None

                        if firstLumi == None:
                            # Set the first lumi in the run
                            firstLumi = lumi

                        # If we're full, end the job
                        if lumisInJob == lumisPerJob:
                            stopJob = True
                        # Actually do the new job creation
                        if stopJob:
                            if firstLumi != None and lastLumi != None and lastRun != None:
                                self.currentJob['mask'].addRunAndLumis(run = lastRun,
                                                                       lumis = [firstLumi, lastLumi])
                                addedEvents = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                                runAddedTime = addedEvents * timePerEvent
                                runAddedSize = addedEvents * sizePerEvent
                                self.currentJob.addResourceEstimates(jobTime = runAddedTime,
                                                                     disk = runAddedSize)
                            self.lumiChecker.closeJob(self.currentJob) # before creating a new job add the lumis of the current one to the checker
                            self.newJob(name = self.getJobName())
                            self.currentJob.addResourceEstimates(memory = memoryRequirement)
                            if deterministicPileup:
                                self.currentJob.addBaggageParameter("skipPileupEvents", (self.nJobs - 1) * lumisPerJob * eventsPerLumiInDataset)
                            firstLumi = lumi
                            lumisInJob = 0
                            totalJobs += 1

                            # Add the file to new jobs
                            self.currentJob.addFile(f)

                        lumisInJob += 1
                        lumisInTask += 1
                        lastLumi = lumi
                        stopJob = False
                        lastRun = run.run

                        if self.currentJob and not f in self.currentJob['input_files']:
                            self.currentJob.addFile(f)

                        if totalLumis > 0 and lumisInTask >= totalLumis:
                            stopTask = True
                            break

                    if firstLumi != None and lastLumi != None:
                        # Add this run to the mask
                        self.currentJob['mask'].addRunAndLumis(run = run.run,
                                                               lumis = [firstLumi, lastLumi])
                        addedEvents = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                        runAddedTime = addedEvents * timePerEvent
                        runAddedSize = addedEvents * sizePerEvent
                        self.currentJob.addResourceEstimates(jobTime = runAddedTime, disk = runAddedSize)
                        firstLumi = None
                        lastLumi = None

                    if stopTask:
                        break

                if stopTask:
                    break

            if stopTask:
                break

        self.lumiChecker.closeJob(self.currentJob)
        self.lumiChecker.fixInputFiles()
        return
Пример #18
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Split up all the available files such that each job will process a
        maximum of 'files_per_job'.  If the 'files_per_job' parameters is not
        passed in jobs will process a maximum of 10 files.
        """

        filesPerJob = int(kwargs.get("files_per_job", 10))
        jobsPerGroup = int(kwargs.get("jobs_per_group", 0))
        totalFiles = int(kwargs.get("total_files", 0))
        runs = kwargs.get('runs', None)
        lumis = kwargs.get('lumis', None)
        runBoundaries = kwargs.get("respect_run_boundaries", False)
        getParents = kwargs.get("include_parents", False)
        filesInJob = 0
        timePerEvent, sizePerEvent, memoryRequirement = \
                    self.getPerformanceParameters(kwargs.get('performance', {}))

        goodRunList = {}
        if runs and lumis:
            goodRunList = buildLumiMask(runs, lumis)

        #Get a dictionary of sites, files
        lDict = self.sortByLocation()
        locationDict = {}

        for key in lDict:
            newlist = []
            for f in lDict[key]:
                if runs and lumis:
                    ## Skip this file is it has no runs.
                    if len(f['runs']) == 0:
                        continue
                    f['lumiCount'] = 0
                    f['runs'] = sorted(f['runs'])
                    for run in f['runs']:
                        run.lumis.sort()
                        f['lumiCount'] += len(run.lumis)
                    f['lowestRun'] = f['runs'][0]
                    ## Skip this file is it has no lumis.
                    if f['lumiCount'] == 0:
                        continue
                    ## Do average event per lumi calculation.
                    f['avgEvtsPerLumi'] = round(
                        float(f['events']) / f['lumiCount'])
                newlist.append(f)
            locationDict[key] = sorted(newlist, key=lambda f: f['lfn'])

        ## Make a list with all the files, sorting them by LFN. Remove from the list all
        ## the files filtered out by the lumi-mask (if there is one).
        files = []
        for filesPerLocSet in locationDict.values():
            for f in filesPerLocSet:
                files.append(f)
        if len(files):
            files = sorted(files, key=lambda f: f['lfn'])
            if runs and lumis:
                skippedFiles = []
                for f in files:
                    skipFile = True
                    for run in f['runs']:
                        if not isGoodRun(goodRunList, run.run):
                            continue
                        for lumi in run:
                            if not isGoodLumi(goodRunList, run.run, lumi):
                                continue
                            skipFile = False
                    if skipFile:
                        skippedFiles.append(f)
                for f in skippedFiles:
                    files.remove(f)

        ## Keep only the first totalFiles files. Remove the other files from the locationDict.
        if totalFiles > 0 and totalFiles < len(files):
            removedFiles = files[totalFiles:]
            files = files[:totalFiles]
            for f in removedFiles:
                for locSet in locationDict.keys():
                    if f in locationDict[locSet]:
                        locationDict[locSet].remove(f)

        for locSet in locationDict.keys():
            #Now we have all the files in a certain location set
            fileList = locationDict[locSet]
            filesInJob = 0
            jobsInGroup = 0
            self.newGroup()
            if len(fileList) == 0:
                continue
            jobRun = None
            for f in fileList:
                if getParents:
                    parentLFNs = self.findParent(lfn=f['lfn'])
                    for lfn in parentLFNs:
                        parent = File(lfn=lfn)
                        f['parents'].add(parent)
                fileRun = f.get('minrun', None)
                createNewJob = False
                if filesInJob == 0 or filesInJob == filesPerJob or (
                        runBoundaries and fileRun != jobRun):
                    createNewJob = True
                if runs and lumis:
                    for run in f['runs']:
                        if not isGoodRun(goodRunList, run.run):
                            continue
                        firstLumi = None
                        lastLumi = None
                        for lumi in run:
                            if not isGoodLumi(goodRunList, run.run, lumi):
                                if firstLumi != None and lastLumi != None:
                                    self.currentJob['mask'].addRunAndLumis(
                                        run=run.run,
                                        lumis=[firstLumi, lastLumi])
                                    addedEvents = ((lastLumi - firstLumi + 1) *
                                                   f['avgEvtsPerLumi'])
                                    runAddedTime = addedEvents * timePerEvent
                                    runAddedSize = addedEvents * sizePerEvent
                                    self.currentJob.addResourceEstimates(
                                        jobTime=runAddedTime,
                                        disk=runAddedSize)
                                    firstLumi = None
                                    lastLumi = None
                                continue
                            if lastLumi != None and lumi != lastLumi + 1:
                                self.currentJob['mask'].addRunAndLumis(
                                    run=run.run, lumis=[firstLumi, lastLumi])
                                addedEvents = ((lastLumi - firstLumi + 1) *
                                               f['avgEvtsPerLumi'])
                                runAddedTime = addedEvents * timePerEvent
                                runAddedSize = addedEvents * sizePerEvent
                                self.currentJob.addResourceEstimates(
                                    jobTime=runAddedTime, disk=runAddedSize)
                                firstLumi = None
                                lastLumi = None
                            if createNewJob:
                                if jobsPerGroup:
                                    if jobsInGroup > jobsPerGroup:
                                        self.newGroup()
                                        jobsInGroup = 0
                                self.newJob(name=self.getJobName())
                                self.currentJob.addResourceEstimates(
                                    memory=memoryRequirement)
                                filesInJob = 0
                                jobsInGroup += 1
                                jobRun = fileRun
                                createNewJob = False
                                self.currentJob.addFile(f)
                                filesInJob += 1
                            if firstLumi == None:
                                firstLumi = lumi
                            lastLumi = lumi
                            if self.currentJob and not f in self.currentJob[
                                    'input_files']:
                                self.currentJob.addFile(f)
                                filesInJob += 1
                        if firstLumi != None and lastLumi != None:
                            self.currentJob['mask'].addRunAndLumis(
                                run=run.run, lumis=[firstLumi, lastLumi])
                            addedEvents = ((lastLumi - firstLumi + 1) *
                                           f['avgEvtsPerLumi'])
                            runAddedTime = addedEvents * timePerEvent
                            runAddedSize = addedEvents * sizePerEvent
                            self.currentJob.addResourceEstimates(
                                jobTime=runAddedTime, disk=runAddedSize)
                            firstLumi = None
                            lastLumi = None
                else:
                    if createNewJob:
                        if jobsPerGroup:
                            if jobsInGroup > jobsPerGroup:
                                self.newGroup()
                                jobsInGroup = 0
                        self.newJob(name=self.getJobName())
                        self.currentJob.addResourceEstimates(
                            memory=memoryRequirement)
                        filesInJob = 0
                        jobsInGroup += 1
                        jobRun = fileRun
                    self.currentJob.addFile(f)
                    filesInJob += 1
                    fileTime = f['events'] * timePerEvent
                    fileSize = f['events'] * sizePerEvent
                    self.currentJob.addResourceEstimates(jobTime=fileTime,
                                                         disk=fileSize)

        return
Пример #19
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Split files into a number of lumis per job
        Allow a flag to determine if we split files between jobs
        """

        avgEventsPerJob = int(kwargs.get('events_per_job', 5000))
        eventLimit      = int(kwargs.get('max_events_per_lumi', 20000))
        totalEvents     = int(kwargs.get('total_events', 0))
        splitOnFile     = bool(kwargs.get('halt_job_on_file_boundaries', True))
        ignoreACDC      = bool(kwargs.get('ignore_acdc_except', False))
        collectionName  = kwargs.get('collectionName', None)
        splitOnRun      = kwargs.get('splitOnRun', True)
        getParents      = kwargs.get('include_parents', False)
        runWhitelist    = kwargs.get('runWhitelist', [])
        runs            = kwargs.get('runs', None)
        lumis           = kwargs.get('lumis', None)
        timePerEvent, sizePerEvent, memoryRequirement = \
                    self.getPerformanceParameters(kwargs.get('performance', {}))
        capJobTime      = kwargs.get('capJobTime', None)
        capJobDisk      = kwargs.get('capJobDisk', None)
        deterministicPileup = kwargs.get('deterministicPileup', False)
        eventsPerLumiInDataset = 0

        if deterministicPileup and self.package == 'WMCore.WMBS':
            getJobNumber = self.daoFactory(classname = "Jobs.GetNumberOfJobsPerWorkflow")
            jobNumber = getJobNumber.execute(workflow = self.subscription.getWorkflow().id)
            self.nJobs = jobNumber

        goodRunList = {}
        if runs and lumis:
            goodRunList = buildLumiMask(runs, lumis)

        # If we have runLumi info, we need to load it from couch
        if collectionName:
            try:
                from WMCore.ACDC.DataCollectionService import DataCollectionService
                couchURL       = kwargs.get('couchURL')
                couchDB        = kwargs.get('couchDB')
                filesetName    = kwargs.get('filesetName')
                collectionName = kwargs.get('collectionName')
                owner          = kwargs.get('owner')
                group          = kwargs.get('group')

                logging.info('Creating jobs for ACDC fileset %s' % filesetName)
                dcs = DataCollectionService(couchURL, couchDB)
                goodRunList = dcs.getLumiWhitelist(collectionName, filesetName, owner, group)
            except Exception as ex:
                msg =  "Exception while trying to load goodRunList\n"
                if ignoreACDC:
                    msg +=  "Ditching goodRunList\n"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    logging.error(msg)
                    goodRunList = {}
                else:
                    msg +=  "Refusing to create any jobs.\n"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                    logging.error(msg)
                    return

        lDict = self.sortByLocation()
        locationDict = {}

        # First we need to load the data
        if self.package == 'WMCore.WMBS':
            loadRunLumi = self.daoFactory(classname = "Files.GetBulkRunLumi")

        for key in lDict.keys():
            newlist = []
            # First we need to load the data
            if self.package == 'WMCore.WMBS':
                fileLumis = loadRunLumi.execute(files = lDict[key])
                for f in lDict[key]:
                    lumiDict = fileLumis.get(f['id'], {})
                    for run in lumiDict.keys():
                        f.addRun(run = Run(run, *lumiDict[run]))

            for f in lDict[key]:
                if len(f['runs']) == 0:
                    continue
                f['runs'] = sorted(f['runs'])
                f['lumiCount'] = 0
                for run in f['runs']:
                    run.lumis.sort()
                    f['lumiCount'] += len(run.lumis)
                f['lowestRun'] = f['runs'][0]

                #Do average event per lumi calculation
                if f['lumiCount']:
                    f['avgEvtsPerLumi'] = round(float(f['events'])/f['lumiCount'])
                    if deterministicPileup:
                        # We assume that all lumis are equal in the dataset
                        eventsPerLumiInDataset = f['avgEvtsPerLumi']
                else:
                    #No lumis in the file, ignore it
                    continue
                newlist.append(f)


            locationDict[key] = sorted(newlist, key=operator.itemgetter('lowestRun'))

        totalJobs      = 0
        lastLumi       = None
        firstLumi      = None
        lastRun        = None
        lumisInJob     = 0
        totalAvgEventCount = 0
        currentJobAvgEventCount = 0
        stopTask = False
        for location in locationDict:

            # For each location, we need a new jobGroup
            self.newGroup()
            stopJob = True
            for f in locationDict[location]:

                if getParents:
                    parentLFNs = self.findParent(lfn = f['lfn'])
                    for lfn in parentLFNs:
                        parent = File(lfn = lfn)
                        f['parents'].add(parent)

                lumisInJobInFile = 0
                updateSplitOnJobStop = False
                failNextJob          = False
                #If the number of events per lumi is higher than the limit
                #and it's only one lumi then ditch that lumi
                if f['avgEvtsPerLumi'] > eventLimit and f['lumiCount'] == 1:
                    failNextJob = True
                    stopJob = True
                    lumisPerJob = 1
                elif splitOnFile:
                    # Then we have to split on every boundary
                    stopJob = True
                    #Check the average number of events per lumi in this file
                    #Adapt the lumis per job to match the target conditions
                    if f['avgEvtsPerLumi']:
                        #If there are events in the file
                        ratio = float(avgEventsPerJob) / f['avgEvtsPerLumi']
                        lumisPerJob = max(int(math.floor(ratio)), 1)
                    else:
                        #Zero event file, then the ratio goes to infinity. Computers don't like that
                        lumisPerJob = f['lumiCount']
                else:
                    #Analyze how many events does this job already has
                    #Check how many we want as target, include as many lumi sections as possible
                    updateSplitOnJobStop = True
                    eventsRemaining = max(avgEventsPerJob - currentJobAvgEventCount, 0)
                    if f['avgEvtsPerLumi']:
                        lumisAllowed = int(math.floor(float(eventsRemaining) / f['avgEvtsPerLumi']))
                    else:
                        lumisAllowed = f['lumiCount']
                    lumisPerJob = max(lumisInJob + lumisAllowed, 1)

                for run in f['runs']:
                    if not isGoodRun(goodRunList = goodRunList, run = run.run):
                        # Then skip this one
                        continue
                    if len(runWhitelist) > 0 and not run.run in runWhitelist:
                        # Skip due to run whitelist
                        continue
                    firstLumi = None

                    if splitOnRun and run.run != lastRun:
                        # Then we need to kill this job and get a new one
                        stopJob = True

                    # Now loop over the lumis
                    for lumi in run:
                        if not isGoodLumi(goodRunList, run = run.run, lumi = lumi):
                            # Kill the chain of good lumis
                            # Skip this lumi
                            if firstLumi != None and firstLumi != lumi:
                                self.currentJob['mask'].addRunAndLumis(run = run.run,
                                                                       lumis = [firstLumi, lastLumi])
                                eventsAdded = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                                runAddedTime = eventsAdded * timePerEvent
                                runAddedSize = eventsAdded * sizePerEvent
                                self.currentJob.addResourceEstimates(jobTime = runAddedTime, disk = runAddedSize)
                                if capJobTime or capJobDisk:
                                    self.currentJob.capResourceEstimates(jobTime = capJobTime, disk = capJobDisk)
                                firstLumi = None
                                lastLumi = None
                            continue

                        # You have to kill the lumi chain if they're not continuous
                        if lastLumi and not lumi == lastLumi + 1:
                            self.currentJob['mask'].addRunAndLumis(run = run.run,
                                                                   lumis = [firstLumi, lastLumi])
                            eventsAdded = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                            runAddedTime = eventsAdded * timePerEvent
                            runAddedSize = eventsAdded * sizePerEvent
                            self.currentJob.addResourceEstimates(jobTime = runAddedTime, disk = runAddedSize)
                            if capJobTime or capJobDisk:
                                self.currentJob.capResourceEstimates(jobTime = capJobTime, disk = capJobDisk)
                            firstLumi = None
                            lastLumi = None

                        if firstLumi == None:
                            # Set the first lumi in the run
                            firstLumi = lumi

                        # If we're full, end the job
                        if lumisInJob == lumisPerJob:
                            stopJob = True
                        # Actually do the new job creation
                        if stopJob:
                            if firstLumi != None and lastLumi != None and lastRun != None:
                                self.currentJob['mask'].addRunAndLumis(run = lastRun,
                                                                       lumis = [firstLumi, lastLumi])
                                eventsAdded = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                                runAddedTime = eventsAdded * timePerEvent
                                runAddedSize = eventsAdded * sizePerEvent
                                self.currentJob.addResourceEstimates(jobTime = runAddedTime, disk = runAddedSize)
                                if capJobTime or capJobDisk:
                                    self.currentJob.capResourceEstimates(jobTime = capJobTime, disk = capJobDisk)
                            msg = None
                            if failNextJob:
                                msg = "File %s has too many events (%d) in %d lumi(s)" % (f['lfn'],
                                                                                          f['events'],
                                                                                          f['lumiCount'])
                            self.newJob(name = self.getJobName(), failedJob = failNextJob,
                                        failedReason = msg)
                            if deterministicPileup:
                                self.currentJob.addBaggageParameter("skipPileupEvents", (self.nJobs - 1) * lumisPerJob * eventsPerLumiInDataset)
                            self.currentJob.addResourceEstimates(memory = memoryRequirement)
                            failNextJob = False
                            firstLumi = lumi
                            lumisInJob = 0
                            lumisInJobInFile = 0
                            currentJobAvgEventCount = 0
                            totalJobs += 1

                            # Add the file to new jobs
                            self.currentJob.addFile(f)

                            if updateSplitOnJobStop:
                                #Then we were carrying from a previous file
                                #Reset calculations for this file
                                updateSplitOnJobStop = False
                                if f['avgEvtsPerLumi']:
                                    ratio = float(avgEventsPerJob) / f['avgEvtsPerLumi']
                                    lumisPerJob = max(int(math.floor(ratio)), 1)
                                else:
                                    lumisPerJob = f['lumiCount']

                        lumisInJob += 1
                        lumisInJobInFile += 1
                        lastLumi = lumi
                        stopJob = False
                        lastRun = run.run
                        totalAvgEventCount += f['avgEvtsPerLumi']

                        if self.currentJob and not f in self.currentJob['input_files']:
                            self.currentJob.addFile(f)

                        # We stop here if there are more total events than requested.
                        if totalEvents > 0 and totalAvgEventCount >= totalEvents:
                            stopTask = True
                            break

                    if firstLumi != None and lastLumi != None:
                        # Add this run to the mask
                        self.currentJob['mask'].addRunAndLumis(run = run.run,
                                                               lumis = [firstLumi, lastLumi])
                        eventsAdded = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                        runAddedTime = eventsAdded * timePerEvent
                        runAddedSize = eventsAdded * sizePerEvent
                        self.currentJob.addResourceEstimates(jobTime = runAddedTime, disk = runAddedSize)
                        if capJobTime or capJobDisk:
                            self.currentJob.capResourceEstimates(jobTime = capJobTime, disk = capJobDisk)
                        firstLumi = None
                        lastLumi = None

                    if stopTask:
                        break

                if not splitOnFile:
                    currentJobAvgEventCount += f['avgEvtsPerLumi'] * lumisInJobInFile

                if stopTask:
                    break

            if stopTask:
                break

        return
Пример #20
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Split files into a number of lumis per job
        Allow a flag to determine if we split files between jobs
        """

        lumisPerJob = int(kwargs.get('lumis_per_job', 1))
        totalLumis = int(kwargs.get('total_lumis', 0))
        splitOnFile = bool(kwargs.get('halt_job_on_file_boundaries', False))
        self.collectionName = kwargs.get('collectionName', None)
        splitOnRun = kwargs.get('splitOnRun', True)
        getParents = kwargs.get('include_parents', False)
        runWhitelist = kwargs.get('runWhitelist', [])
        runs = kwargs.get('runs', None)
        lumis = kwargs.get('lumis', None)
        deterministicPileup = kwargs.get('deterministicPileup', False)
        applyLumiCorrection = bool(kwargs.get('applyLumiCorrection', False))
        eventsPerLumiInDataset = 0

        if self.package == 'WMCore.WMBS':
            self.loadRunLumi = self.daoFactory(
                classname="Files.GetBulkRunLumi")
            if deterministicPileup:
                getJobNumber = self.daoFactory(
                    classname="Jobs.GetNumberOfJobsPerWorkflow")
                self.nJobs = getJobNumber.execute(
                    workflow=self.subscription.getWorkflow().id)

        timePerEvent, sizePerEvent, memoryRequirement = \
            self.getPerformanceParameters(kwargs.get('performance', {}))

        goodRunList = {}
        if runs and lumis:
            goodRunList = buildLumiMask(runs, lumis)

        # If we have runLumi info, we need to load it from couch
        if self.collectionName:
            try:
                from WMCore.ACDC.DataCollectionService import DataCollectionService
                couchURL = kwargs.get('couchURL')
                couchDB = kwargs.get('couchDB')
                filesetName = kwargs.get('filesetName')

                logging.info('Creating jobs for ACDC fileset %s', filesetName)
                dcs = DataCollectionService(couchURL, couchDB)
                goodRunList = dcs.getLumiWhitelist(self.collectionName,
                                                   filesetName)
            except Exception as ex:
                msg = "Exception while trying to load goodRunList. "
                msg += "Refusing to create any jobs.\nDetails: %s" % str(ex)
                logging.exception(msg)
                return

        lDict = self.getFilesSortedByLocation(lumisPerJob)
        if not lDict:
            logging.info(
                "There are not enough lumis/files to be splitted. Trying again next cycle"
            )
            return
        locationDict = {}
        for key in lDict.keys():
            newlist = []
            for f in lDict[key]:
                # if hasattr(f, 'loadData'):
                #    f.loadData()
                if len(f['runs']) == 0:
                    continue
                f['lumiCount'] = 0
                f['runs'] = sorted(f['runs'])
                for run in f['runs']:
                    run.lumis.sort()
                    f['lumiCount'] += len(run.lumis)
                f['lowestRun'] = f['runs'][0]
                # Do average event per lumi calculation
                if f['lumiCount']:
                    f['avgEvtsPerLumi'] = round(
                        float(f['events']) / f['lumiCount'])
                    if deterministicPileup:
                        # We assume that all lumis are equal in the dataset
                        eventsPerLumiInDataset = f['avgEvtsPerLumi']
                else:
                    # No lumis in the file, ignore it
                    continue
                newlist.append(f)
            locationDict[key] = sorted(newlist,
                                       key=operator.itemgetter('lowestRun'))

        # Split files into jobs with each job containing
        # EXACTLY lumisPerJob number of lumis (except for maybe the last one)

        totalJobs = 0
        lastLumi = None
        firstLumi = None
        stopJob = True
        stopTask = False
        lastRun = None
        lumisInJob = 0
        lumisInTask = 0
        self.lumiChecker = LumiChecker(applyLumiCorrection)
        for location in locationDict.keys():

            # For each location, we need a new jobGroup
            self.newGroup()
            stopJob = True
            for f in locationDict[location]:
                if getParents:
                    parentLFNs = self.findParent(lfn=f['lfn'])
                    for lfn in parentLFNs:
                        parent = File(lfn=lfn)
                        f['parents'].add(parent)

                if splitOnFile:
                    # Then we have to split on every boundary
                    stopJob = True

                for run in f['runs']:
                    if not isGoodRun(goodRunList=goodRunList, run=run.run):
                        # Then skip this one
                        continue
                    if len(runWhitelist) > 0 and not run.run in runWhitelist:
                        # Skip due to run whitelist
                        continue
                    firstLumi = None

                    if splitOnRun and run.run != lastRun:
                        # Then we need to kill this job and get a new one
                        stopJob = True

                    # Now loop over the lumis
                    for lumi in run:
                        # splitLumi checks if the lumi is split across jobs
                        if (not isGoodLumi(goodRunList, run=run.run, lumi=lumi)
                                or self.lumiChecker.isSplitLumi(
                                    run.run, lumi, f)):
                            # Kill the chain of good lumis
                            # Skip this lumi
                            if firstLumi != None and firstLumi != lumi:
                                self.currentJob['mask'].addRunAndLumis(
                                    run=run.run, lumis=[firstLumi, lastLumi])
                                addedEvents = ((lastLumi - firstLumi + 1) *
                                               f['avgEvtsPerLumi'])
                                runAddedTime = addedEvents * timePerEvent
                                runAddedSize = addedEvents * sizePerEvent
                                self.currentJob.addResourceEstimates(
                                    jobTime=runAddedTime, disk=runAddedSize)
                                firstLumi = None
                                lastLumi = None
                            continue

                        # You have to kill the lumi chain if they're not continuous
                        if lastLumi and not lumi == lastLumi + 1:
                            self.currentJob['mask'].addRunAndLumis(
                                run=run.run, lumis=[firstLumi, lastLumi])
                            addedEvents = ((lastLumi - firstLumi + 1) *
                                           f['avgEvtsPerLumi'])
                            runAddedTime = addedEvents * timePerEvent
                            runAddedSize = addedEvents * sizePerEvent
                            self.currentJob.addResourceEstimates(
                                jobTime=runAddedTime, disk=runAddedSize)
                            firstLumi = None
                            lastLumi = None

                        if firstLumi is None:
                            # Set the first lumi in the run
                            firstLumi = lumi

                        # If we're full, end the job
                        if lumisInJob == lumisPerJob:
                            stopJob = True
                        # Actually do the new job creation
                        if stopJob:
                            if firstLumi != None and lastLumi != None and lastRun != None:
                                self.currentJob['mask'].addRunAndLumis(
                                    run=lastRun, lumis=[firstLumi, lastLumi])
                                addedEvents = ((lastLumi - firstLumi + 1) *
                                               f['avgEvtsPerLumi'])
                                runAddedTime = addedEvents * timePerEvent
                                runAddedSize = addedEvents * sizePerEvent
                                self.currentJob.addResourceEstimates(
                                    jobTime=runAddedTime, disk=runAddedSize)
                            # before creating a new job add the lumis of the current one to the checker
                            self.lumiChecker.closeJob(self.currentJob)
                            self.newJob(name=self.getJobName())
                            self.currentJob.addResourceEstimates(
                                memory=memoryRequirement)
                            if deterministicPileup:
                                skipEvents = (
                                    self.nJobs -
                                    1) * lumisPerJob * eventsPerLumiInDataset
                                self.currentJob.addBaggageParameter(
                                    "skipPileupEvents", skipEvents)
                            firstLumi = lumi
                            lumisInJob = 0
                            totalJobs += 1

                            # Add the file to new jobs
                            self.currentJob.addFile(f)

                        lumisInJob += 1
                        lumisInTask += 1
                        lastLumi = lumi
                        stopJob = False
                        lastRun = run.run

                        if self.currentJob and not f in self.currentJob[
                                'input_files']:
                            self.currentJob.addFile(f)

                        if totalLumis > 0 and lumisInTask >= totalLumis:
                            stopTask = True
                            break

                    if firstLumi != None and lastLumi != None:
                        # Add this run to the mask
                        self.currentJob['mask'].addRunAndLumis(
                            run=run.run, lumis=[firstLumi, lastLumi])
                        addedEvents = ((lastLumi - firstLumi + 1) *
                                       f['avgEvtsPerLumi'])
                        runAddedTime = addedEvents * timePerEvent
                        runAddedSize = addedEvents * sizePerEvent
                        self.currentJob.addResourceEstimates(
                            jobTime=runAddedTime, disk=runAddedSize)
                        firstLumi = None
                        lastLumi = None

                    if stopTask:
                        break

                if stopTask:
                    break

            if stopTask:
                break

        self.lumiChecker.closeJob(self.currentJob)
        self.lumiChecker.fixInputFiles()
        return
Пример #21
0
    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        Split files into a number of lumis per job
        Allow a flag to determine if we split files between jobs
        """

        avgEventsPerJob = int(kwargs.get('events_per_job', 5000))
        jobLimit = int(kwargs.get('job_limit', 0))
        jobTimeLimit = int(kwargs.get('job_time_limit', self.defaultJobTimeLimit))
        totalEvents = int(kwargs.get('total_events', 0))
        splitOnFile = bool(kwargs.get('halt_job_on_file_boundaries', False))
        self.collectionName = kwargs.get('collectionName', None)
        splitOnRun = kwargs.get('splitOnRun', True)
        getParents = kwargs.get('include_parents', False)
        runWhitelist = kwargs.get('runWhitelist', [])
        runs = kwargs.get('runs', None)
        lumis = kwargs.get('lumis', None)
        applyLumiCorrection = bool(kwargs.get('applyLumiCorrection', False))
        deterministicPileup = kwargs.get('deterministicPileup', False)
        allowCreationFailure = kwargs.get('allowCreationFailure', True)

        timePerEvent, sizePerEvent, memoryRequirement = \
            self.getPerformanceParameters(kwargs.get('performance', {}))

        eventsPerLumiInDataset = 0

        if avgEventsPerJob <= 0:
            msg = "events_per_job parameter must be positive. Its value is: %d" % avgEventsPerJob
            raise RuntimeError(msg)

        if self.package == 'WMCore.WMBS':
            self.loadRunLumi = self.daoFactory(classname="Files.GetBulkRunLumi")
            if deterministicPileup:
                getJobNumber = self.daoFactory(classname="Jobs.GetNumberOfJobsPerWorkflow")
                self.nJobs = getJobNumber.execute(workflow=self.subscription.getWorkflow().id)
                logging.info('Creating jobs in DeterministicPileup mode for %s',
                             self.subscription.workflowName())

        goodRunList = {}
        if runs and lumis:
            goodRunList = buildLumiMask(runs, lumis)

        # If we have runLumi info, we need to load it from couch
        if self.collectionName:
            try:
                from WMCore.ACDC.DataCollectionService import DataCollectionService
                couchURL = kwargs.get('couchURL')
                couchDB = kwargs.get('couchDB')
                filesetName = kwargs.get('filesetName')

                logging.info('Creating jobs for ACDC fileset %s', filesetName)
                dcs = DataCollectionService(couchURL, couchDB)
                goodRunList = dcs.getLumiWhitelist(self.collectionName, filesetName)
            except Exception as ex:
                msg = "Exception while trying to load goodRunList. "
                msg += "Refusing to create any jobs.\nDetails: %s" % str(ex)
                logging.exception(msg)
                return

        lDict = self.getFilesSortedByLocation(avgEventsPerJob)
        if not lDict:
            logging.info("There are not enough events/files to be splitted. Trying again next cycle")
            return

        locationDict = {}
        for key in lDict.keys():
            newlist = []
            # First we need to load the data
            if self.loadRunLumi:
                fileLumis = self.loadRunLumi.execute(files=lDict[key])
                if not fileLumis:
                    logging.warning("Empty fileLumis dict for workflow %s, subs %s.",
                                    self.subscription.workflowName(), self.subscription['id'])
                for f in lDict[key]:
                    lumiDict = fileLumis.get(f['id'], {})
                    for run in lumiDict.keys():
                        f.addRun(run=Run(run, *lumiDict[run]))

            for f in lDict[key]:
                if len(f['runs']) == 0:
                    continue
                f['runs'] = sorted(f['runs'])
                f['lumiCount'] = 0
                for run in f['runs']:
                    run.lumis.sort()
                    f['lumiCount'] += len(run.lumis)
                f['lowestRun'] = f['runs'][0]

                # Do average event per lumi calculation
                if f['lumiCount']:
                    f['avgEvtsPerLumi'] = round(float(f['events']) / f['lumiCount'])
                    if deterministicPileup:
                        # We assume that all lumis are equal in the dataset
                        eventsPerLumiInDataset = f['avgEvtsPerLumi']
                else:
                    # No lumis in the file, ignore it
                    continue
                newlist.append(f)

            locationDict[key] = sorted(newlist, key=operator.itemgetter('lowestRun'))

        totalJobs = 0
        lastLumi = None
        firstLumi = None
        lastRun = None
        lumisInJob = 0
        totalAvgEventCount = 0
        currentJobAvgEventCount = 0
        stopTask = False
        self.lumiChecker = LumiChecker(applyLumiCorrection)
        for location in locationDict:

            # For each location, we need a new jobGroup
            self.newGroup()
            stopJob = True
            for f in locationDict[location]:

                if getParents:
                    parentLFNs = self.findParent(lfn=f['lfn'])
                    for lfn in parentLFNs:
                        parent = File(lfn=lfn)
                        f['parents'].add(parent)

                lumisInJobInFile = 0
                updateSplitOnJobStop = False
                failNextJob = False
                # If estimated job time is higher the job time limit (condor limit)
                # and it's only one lumi then ditch that lumi
                timePerLumi = f['avgEvtsPerLumi'] * timePerEvent
                if timePerLumi > jobTimeLimit and f['lumiCount'] == 1:
                    lumisPerJob = 1
                    stopJob = True
                    if allowCreationFailure:
                        failNextJob = True
                elif splitOnFile:
                    # Then we have to split on every boundary
                    stopJob = True
                    # Check the average number of events per lumi in this file
                    # Adapt the lumis per job to match the target conditions
                    if f['avgEvtsPerLumi']:
                        # If there are events in the file
                        ratio = float(avgEventsPerJob) / f['avgEvtsPerLumi']
                        lumisPerJob = max(int(math.floor(ratio)), 1)
                    else:
                        # Zero event file, then the ratio goes to infinity. Computers don't like that
                        lumisPerJob = f['lumiCount']
                else:
                    # Analyze how many events does this job already has
                    # Check how many we want as target, include as many lumi sections as possible
                    updateSplitOnJobStop = True
                    eventsRemaining = max(avgEventsPerJob - currentJobAvgEventCount, 0)
                    if f['avgEvtsPerLumi']:
                        lumisAllowed = int(math.floor(float(eventsRemaining) / f['avgEvtsPerLumi']))
                    else:
                        lumisAllowed = f['lumiCount']
                    lumisPerJob = max(lumisInJob + lumisAllowed, 1)

                for run in f['runs']:
                    if not isGoodRun(goodRunList=goodRunList, run=run.run):
                        # Then skip this one
                        continue
                    if len(runWhitelist) > 0 and not run.run in runWhitelist:
                        # Skip due to run whitelist
                        continue
                    firstLumi = None

                    if splitOnRun and run.run != lastRun:
                        # Then we need to kill this job and get a new one
                        stopJob = True

                    # Now loop over the lumis
                    for lumi in run:
                        if (not isGoodLumi(goodRunList, run=run.run, lumi=lumi) or
                                self.lumiChecker.isSplitLumi(run.run, lumi, f)):
                            # Kill the chain of good lumis
                            # Skip this lumi
                            if firstLumi != None and firstLumi != lumi:
                                self.currentJob['mask'].addRunAndLumis(run=run.run,
                                                                       lumis=[firstLumi, lastLumi])
                                eventsAdded = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                                runAddedTime = eventsAdded * timePerEvent
                                runAddedSize = eventsAdded * sizePerEvent
                                self.currentJob.addResourceEstimates(jobTime=runAddedTime, disk=runAddedSize)
                                firstLumi = None
                                lastLumi = None
                            continue

                        # You have to kill the lumi chain if they're not continuous
                        if lastLumi and not lumi == lastLumi + 1:
                            self.currentJob['mask'].addRunAndLumis(run=run.run,
                                                                   lumis=[firstLumi, lastLumi])
                            eventsAdded = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                            runAddedTime = eventsAdded * timePerEvent
                            runAddedSize = eventsAdded * sizePerEvent
                            self.currentJob.addResourceEstimates(jobTime=runAddedTime, disk=runAddedSize)
                            firstLumi = None
                            lastLumi = None

                        if firstLumi is None:
                            # Set the first lumi in the run
                            firstLumi = lumi

                        # If we're full, end the job
                        if lumisInJob == lumisPerJob:
                            stopJob = True
                        # Actually do the new job creation
                        if stopJob:
                            if firstLumi != None and lastLumi != None and lastRun != None:
                                self.currentJob['mask'].addRunAndLumis(run=lastRun,
                                                                       lumis=[firstLumi, lastLumi])
                                eventsAdded = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                                runAddedTime = eventsAdded * timePerEvent
                                runAddedSize = eventsAdded * sizePerEvent
                                self.currentJob.addResourceEstimates(jobTime=runAddedTime, disk=runAddedSize)
                            msg = None
                            if failNextJob:
                                msg = "File %s has a single lumi %s, in run %s " % (f['lfn'], lumi, run.run)
                                msg += "with too many events %d and it woud take %d sec to run" \
                                       % (f['events'], timePerLumi)
                            self.lumiChecker.closeJob(self.currentJob)
                            self.newJob(name=self.getJobName(), failedJob=failNextJob, failedReason=msg)
                            if deterministicPileup:
                                skipEvents = (self.nJobs - 1) * lumisPerJob * eventsPerLumiInDataset
                                self.currentJob.addBaggageParameter("skipPileupEvents", skipEvents)
                            self.currentJob.addResourceEstimates(memory=memoryRequirement)
                            failNextJob = False
                            firstLumi = lumi
                            lumisInJob = 0
                            lumisInJobInFile = 0
                            currentJobAvgEventCount = 0
                            totalJobs += 1
                            if jobLimit and totalJobs > jobLimit:
                                msg = "Job limit of {0} jobs exceeded.".format(jobLimit)
                                raise RuntimeError(msg)

                            # Add the file to new jobs
                            self.currentJob.addFile(f)

                            if updateSplitOnJobStop:
                                # Then we were carrying from a previous file
                                # Reset calculations for this file
                                updateSplitOnJobStop = False
                                if f['avgEvtsPerLumi']:
                                    ratio = float(avgEventsPerJob) / f['avgEvtsPerLumi']
                                    lumisPerJob = max(int(math.floor(ratio)), 1)
                                else:
                                    lumisPerJob = f['lumiCount']

                        lumisInJob += 1
                        lumisInJobInFile += 1
                        lastLumi = lumi
                        stopJob = False
                        lastRun = run.run
                        totalAvgEventCount += f['avgEvtsPerLumi']

                        if self.currentJob and not f in self.currentJob['input_files']:
                            self.currentJob.addFile(f)

                        # We stop here if there are more total events than requested.
                        if totalEvents > 0 and totalAvgEventCount >= totalEvents:
                            stopTask = True
                            break

                    if firstLumi != None and lastLumi != None:
                        # Add this run to the mask
                        self.currentJob['mask'].addRunAndLumis(run=run.run,
                                                               lumis=[firstLumi, lastLumi])
                        eventsAdded = ((lastLumi - firstLumi + 1) * f['avgEvtsPerLumi'])
                        runAddedTime = eventsAdded * timePerEvent
                        runAddedSize = eventsAdded * sizePerEvent
                        self.currentJob.addResourceEstimates(jobTime=runAddedTime, disk=runAddedSize)
                        firstLumi = None
                        lastLumi = None

                    if stopTask:
                        break

                if not splitOnFile:
                    currentJobAvgEventCount += f['avgEvtsPerLumi'] * lumisInJobInFile

                if stopTask:
                    break

            if stopTask:
                break

        self.lumiChecker.closeJob(self.currentJob)
        self.lumiChecker.fixInputFiles()
        return