Python Run.Run примеры использования

Язык программирования: Python

Пространство имен/Пакет: WMCore.DataStructs.Run

Класс/Тип: Run

Метод/Функция: Run

Примеров на hotexamples.com: 30

Python Run.Run - 30 примеров найдено. Это лучшие примеры Python кода для WMCore.DataStructs.Run.Run.Run, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Run(30)

extend(9)

appendLumi(3)

Основные методы

Run (30)

extend (9)

appendLumi (3)

Пример #1

Показать файл

    def testB_NoFileSplitNoHardLimit(self):
        """
        _testB_NoFileSplitNoHardLimit_

        In this case we don't split on file boundaries, check different combination of files
        make sure we make the most of the splitting, e.g. include many zero event files in
        a single job.
        """
        splitter = SplitterFactory()

        # Create 100 files with 7 lumi per file and 0 events per lumi on average.
        testSubscription = self.createSubscription(nFiles=100, lumisPerFile=7, twoSites=False,
                                                   nEventsPerFile=0)
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)

        # First test, the optimal settings are 360 events per job
        # As we have files with 0 events per lumi, this will configure the splitting to
        # a single job containing all files
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=360,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 0, "There aren't enough events, so it should have 0 job groups")

        # we close this fileset to get it moving
        fileset = testSubscription.getFileset()
        fileset.markOpen(False)

        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=360,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 1, "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 1, "There should be 1 job")
        self.assertEqual(len(jobs[0]['input_files']), 100, "All 100 files must be in the job")
        self.assertEqual(jobs[0]['estimatedMemoryUsage'], 2300)
        self.assertEqual(jobs[0]['estimatedDiskUsage'], 0)
        self.assertEqual(jobs[0]['estimatedJobTime'], 0)

        # Create 7 files, each one with different lumi/event distributions
        testFileset = Fileset(name="FilesetA")
        testFileset.create()
        testFileA = self.createFile("/this/is/file1", 250, 0, 5, "T2_CH_CERN")
        testFileB = self.createFile("/this/is/file2", 600, 1, 1, "T2_CH_CERN")
        testFileC = self.createFile("/this/is/file3", 1200, 2, 2, "T2_CH_CERN")
        testFileD = self.createFile("/this/is/file4", 100, 3, 1, "T2_CH_CERN")
        testFileE = self.createFile("/this/is/file5", 30, 4, 1, "T2_CH_CERN")
        testFileF = self.createFile("/this/is/file6", 10, 5, 1, "T2_CH_CERN")
        testFileG = self.createFile("/this/is/file7", 151, 6, 3, "T2_CH_CERN")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)
        testFileset.addFile(testFileD)
        testFileset.addFile(testFileE)
        testFileset.addFile(testFileF)
        testFileset.addFile(testFileG)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        testSubscription.create()

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        # Optimal settings are: jobs with 150 events per job
        # This means, the first file must be splitted in 3 lumis per job which would leave room
        # for another lumi in the second job, but the second file has a lumi too big for that
        # The 3rd job only contains the second file, the fourth and fifth job split the third file
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=150,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1, "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 7, "7 jobs must be in the jobgroup")
        self.assertEqual(jobs[0]["mask"].getRunAndLumis(), {0: [[0, 2]]}, "Wrong mask for the first job")
        self.assertEqual(jobs[0]["estimatedJobTime"], 150 * 12)
        self.assertEqual(jobs[0]["estimatedDiskUsage"], 150 * 400)
        self.assertEqual(jobs[1]["mask"].getRunAndLumis(), {0: [[3, 4]]}, "Wrong mask for the second job")
        self.assertEqual(jobs[1]["estimatedJobTime"], 100 * 12)
        self.assertEqual(jobs[1]["estimatedDiskUsage"], 100 * 400)
        self.assertEqual(jobs[2]["mask"].getRunAndLumis(), {1: [[1, 1]]}, "Wrong mask for the third job")
        self.assertEqual(jobs[2]["estimatedJobTime"], 600 * 12)
        self.assertEqual(jobs[2]["estimatedDiskUsage"], 600 * 400)
        self.assertEqual(jobs[3]["mask"].getRunAndLumis(), {2: [[4, 4]]}, "Wrong mask for the fourth job")
        self.assertEqual(jobs[3]["estimatedJobTime"], 600 * 12)
        self.assertEqual(jobs[3]["estimatedDiskUsage"], 600 * 400)
        self.assertEqual(jobs[4]["mask"].getRunAndLumis(), {2: [[5, 5]]}, "Wrong mask for the fifth job")
        self.assertEqual(jobs[4]["estimatedJobTime"], 600 * 12)
        self.assertEqual(jobs[4]["estimatedDiskUsage"], 600 * 400)
        self.assertEqual(jobs[5]["mask"].getRunAndLumis(),
                         {3: [[3, 3]], 4: [[4, 4]], 5: [[5, 5]]}, "Wrong mask for the sixth job")
        self.assertEqual(jobs[5]["estimatedJobTime"], 140 * 12)
        self.assertEqual(jobs[5]["estimatedDiskUsage"], 140 * 400)
        self.assertEqual(jobs[6]["mask"].getRunAndLumis(), {6: [[18, 20]]}, "Wrong mask for the seventh job")
        self.assertEqual(jobs[6]["estimatedJobTime"], 150 * 12)
        self.assertEqual(jobs[6]["estimatedDiskUsage"], 150 * 400)

        for job in jobs:
            self.assertEqual(job["estimatedMemoryUsage"], 2300)
        # Test interactions of this algorithm with splitOnRun = True
        # Make 2 files, one with 3 runs and a second one with the last run of the first
        fileA = File(lfn="/this/is/file1a", size=1000,
                     events=2400)
        lumiListA = []
        lumiListB = []
        lumiListC = []
        for lumi in range(8):
            lumiListA.append(1 + lumi)
            lumiListB.append(1 + lumi)
            lumiListC.append(1 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.addRun(Run(2, *lumiListA))
        fileA.addRun(Run(3, *lumiListA))
        fileA.setLocation("T1_US_FNAL_Disk")

        fileB = self.createFile('/this/is/file2a', 200, 3, 5, "T1_US_FNAL_Disk")

        testFileset = Fileset(name='FilesetB')
        testFileset.create()
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testFileset.commit()
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        testSubscription.create()

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        # The settings for this splitting are 700 events per job
        jobGroups = jobFactory(splitOnRun=True,
                               halt_job_on_file_boundaries=False,
                               events_per_job=700,
                               performance=self.performanceParams)
        self.assertEqual(len(jobGroups), 1, "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 6, "Six jobs must be in the jobgroup")
        self.assertEqual(jobs[0]["estimatedJobTime"], 700 * 12)
        self.assertEqual(jobs[0]["estimatedDiskUsage"], 700 * 400)
        self.assertEqual(jobs[1]["estimatedJobTime"], 100 * 12)
        self.assertEqual(jobs[1]["estimatedDiskUsage"], 100 * 400)
        self.assertEqual(jobs[2]["estimatedJobTime"], 700 * 12)
        self.assertEqual(jobs[2]["estimatedDiskUsage"], 700 * 400)
        self.assertEqual(jobs[3]["estimatedJobTime"], 100 * 12)
        self.assertEqual(jobs[3]["estimatedDiskUsage"], 100 * 400)
        self.assertEqual(jobs[4]["estimatedJobTime"], 700 * 12)
        self.assertEqual(jobs[4]["estimatedDiskUsage"], 700 * 400)
        self.assertEqual(jobs[5]["estimatedJobTime"], 300 * 12)
        self.assertEqual(jobs[5]["estimatedDiskUsage"], 300 * 400)

Пример #2

Показать файл

    def stuffWMBS(self, workflowURL, name):
        """
        _stuffWMBS_

        Insert some dummy jobs, jobgroups, filesets, files and subscriptions
        into WMBS to test job creation.  Three completed job groups each
        containing several files are injected.  Another incomplete job group is
        also injected.  Also files are added to the "Mergeable" subscription as
        well as to the output fileset for their jobgroups.
        """
        locationAction = self.daoFactory(classname="Locations.New")
        locationAction.execute(siteName="s1", pnn="somese.cern.ch")

        mergeFileset = Fileset(name="mergeFileset")
        mergeFileset.create()
        bogusFileset = Fileset(name="bogusFileset")
        bogusFileset.create()

        mergeWorkflow = Workflow(spec=workflowURL,
                                 owner="mnorman",
                                 name=name,
                                 task="/TestWorkload/ReReco")
        mergeWorkflow.create()

        mergeSubscription = Subscription(fileset=mergeFileset,
                                         workflow=mergeWorkflow,
                                         split_algo="ParentlessMergeBySize")
        mergeSubscription.create()
        dummySubscription = Subscription(fileset=bogusFileset,
                                         workflow=mergeWorkflow,
                                         split_algo="ParentlessMergeBySize")

        file1 = File(lfn="file1",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations={"somese.cern.ch"})
        file1.addRun(Run(1, *[45]))
        file1.create()
        file2 = File(lfn="file2",
                     size=1024,
                     events=1024,
                     first_event=1024,
                     locations={"somese.cern.ch"})
        file2.addRun(Run(1, *[45]))
        file2.create()
        file3 = File(lfn="file3",
                     size=1024,
                     events=1024,
                     first_event=2048,
                     locations={"somese.cern.ch"})
        file3.addRun(Run(1, *[45]))
        file3.create()
        file4 = File(lfn="file4",
                     size=1024,
                     events=1024,
                     first_event=3072,
                     locations={"somese.cern.ch"})
        file4.addRun(Run(1, *[45]))
        file4.create()

        fileA = File(lfn="fileA",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations={"somese.cern.ch"})
        fileA.addRun(Run(1, *[46]))
        fileA.create()
        fileB = File(lfn="fileB",
                     size=1024,
                     events=1024,
                     first_event=1024,
                     locations={"somese.cern.ch"})
        fileB.addRun(Run(1, *[46]))
        fileB.create()
        fileC = File(lfn="fileC",
                     size=1024,
                     events=1024,
                     first_event=2048,
                     locations={"somese.cern.ch"})
        fileC.addRun(Run(1, *[46]))
        fileC.create()

        fileI = File(lfn="fileI",
                     size=1024,
                     events=1024,
                     first_event=0,
                     locations={"somese.cern.ch"})
        fileI.addRun(Run(2, *[46]))
        fileI.create()
        fileII = File(lfn="fileII",
                      size=1024,
                      events=1024,
                      first_event=1024,
                      locations={"somese.cern.ch"})
        fileII.addRun(Run(2, *[46]))
        fileII.create()
        fileIII = File(lfn="fileIII",
                       size=1024,
                       events=1024,
                       first_event=2048,
                       locations={"somese.cern.ch"})
        fileIII.addRun(Run(2, *[46]))
        fileIII.create()
        fileIV = File(lfn="fileIV",
                      size=1024 * 1000000,
                      events=1024,
                      first_event=3072,
                      locations={"somese.cern.ch"})
        fileIV.addRun(Run(2, *[46]))
        fileIV.create()

        for fileObj in [
                file1, file2, file3, file4, fileA, fileB, fileC, fileI, fileII,
                fileIII, fileIV
        ]:
            mergeFileset.addFile(fileObj)
            bogusFileset.addFile(fileObj)

        mergeFileset.commit()
        bogusFileset.commit()

        return

Пример #3

Показать файл

Файл: DBSBufferFile_t.py Проект: prozober/WMCore

    def testGetParentLFNs(self):
        """
        _testGetParentLFNs_

        Create three files and set them to be parents of a fourth file.  Check
        to make sure that getParentLFNs() on the child file returns the correct
        LFNs.
        """
        testFileParentA = DBSBufferFile(lfn="/this/is/a/parent/lfnA",
                                        size=1024,
                                        events=20)
        testFileParentA.setAlgorithm(appName="cmsRun",
                                     appVer="CMSSW_2_1_8",
                                     appFam="RECO",
                                     psetHash="GIBBERISH",
                                     configContent="MOREGIBBERISH")
        testFileParentA.setDatasetPath("/Cosmics/CRUZET09-PromptReco-v1/RECO")
        testFileParentA.addRun(Run(1, *[45]))

        testFileParentB = DBSBufferFile(lfn="/this/is/a/parent/lfnB",
                                        size=1024,
                                        events=20)
        testFileParentB.setAlgorithm(appName="cmsRun",
                                     appVer="CMSSW_2_1_8",
                                     appFam="RECO",
                                     psetHash="GIBBERISH",
                                     configContent="MOREGIBBERISH")
        testFileParentB.setDatasetPath("/Cosmics/CRUZET09-PromptReco-v1/RECO")
        testFileParentB.addRun(Run(1, *[45]))

        testFileParentC = DBSBufferFile(lfn="/this/is/a/parent/lfnC",
                                        size=1024,
                                        events=20)
        testFileParentC.setAlgorithm(appName="cmsRun",
                                     appVer="CMSSW_2_1_8",
                                     appFam="RECO",
                                     psetHash="GIBBERISH",
                                     configContent="MOREGIBBERISH")
        testFileParentC.setDatasetPath("/Cosmics/CRUZET09-PromptReco-v1/RECO")
        testFileParentC.addRun(Run(1, *[45]))

        testFileParentA.create()
        testFileParentB.create()
        testFileParentC.create()

        testFile = DBSBufferFile(lfn="/this/is/a/lfn", size=1024, events=10)
        testFile.setAlgorithm(appName="cmsRun",
                              appVer="CMSSW_2_1_8",
                              appFam="RECO",
                              psetHash="GIBBERISH",
                              configContent="MOREGIBBERISH")
        testFile.setDatasetPath("/Cosmics/CRUZET09-PromptReco-v1/RECO")
        testFile.addRun(Run(1, *[45]))
        testFile.create()

        testFile.addParents([
            testFileParentA["lfn"], testFileParentB["lfn"],
            testFileParentC["lfn"]
        ])

        parentLFNs = testFile.getParentLFNs()

        assert len(parentLFNs) == 3, \
               "ERROR: Child does not have the right amount of parents"

        goldenLFNs = [
            "/this/is/a/parent/lfnA", "/this/is/a/parent/lfnB",
            "/this/is/a/parent/lfnC"
        ]
        for parentLFN in parentLFNs:
            assert parentLFN in goldenLFNs, \
                   "ERROR: Unknown parent lfn"
            goldenLFNs.remove(parentLFN)

        testFile.delete()
        testFileParentA.delete()
        testFileParentB.delete()
        testFileParentC.delete()
        return

Пример #4

Показать файл

Файл: LoadForErrorHandler.py Проект: panwarlsweet/WMCore

    def execute(self, jobID, fileSelection=None, conn=None, transaction=False):
        """
        _execute_

        Execute the SQL for the given job ID and then format and return
        the result.
        """

        if type(jobID) == list:
            if len(jobID) < 1:
                # Nothing to do
                return []
            binds = jobID
        else:
            binds = {"jobid": jobID}

        result = self.dbi.processData(self.sql,
                                      binds,
                                      conn=conn,
                                      transaction=transaction)

        jobList = self.formatJobs(result)

        filesResult = self.dbi.processData(self.fileSQL,
                                           binds,
                                           conn=conn,
                                           transaction=transaction)
        fileList = self.formatDict(filesResult)
        fileBinds = []
        if fileSelection:
            fileList = [
                x for x in fileList if x['lfn'] in fileSelection[x['jobid']]
            ]
        for x in fileList:
            # Add new runs
            x['newRuns'] = []
            # Assemble unique list of binds
            if not {'fileid': x['id']} in fileBinds:
                fileBinds.append({'fileid': x['id']})

        parentList = []
        if len(fileBinds) > 0:
            parentResult = self.dbi.processData(self.parentSQL,
                                                fileBinds,
                                                conn=conn,
                                                transaction=transaction)
            parentList = self.formatDict(parentResult)

            lumiResult = self.dbi.processData(self.runLumiSQL,
                                              fileBinds,
                                              conn=conn,
                                              transaction=transaction)
            lumiList = self.formatDict(lumiResult)
            lumiDict = {}
            for l in lumiList:
                if not l['fileid'] in lumiDict.keys():
                    lumiDict[l['fileid']] = []
                lumiDict[l['fileid']].append(l)

            for f in fileList:
                fileRuns = {}
                if f['id'] in lumiDict.keys():
                    for l in lumiDict[f['id']]:
                        run = l['run']
                        lumi = l['lumi']
                        numEvents = l['num_events']
                        fileRuns.setdefault(run, [])
                        fileRuns[run].append((lumi, numEvents))

                for r in fileRuns.keys():
                    newRun = Run(runNumber=r)
                    newRun.lumis = fileRuns[r]
                    f['newRuns'].append(newRun)

        filesForJobs = {}
        for f in fileList:
            jobid = f['jobid']
            if not jobid in filesForJobs.keys():
                filesForJobs[jobid] = {}
            if f['id'] not in filesForJobs[jobid].keys():
                wmbsFile = File(id=f['id'])
                wmbsFile.update(f)
                wmbsFile['locations'].add(f['pnn'])
                for r in wmbsFile['newRuns']:
                    wmbsFile.addRun(r)
                for entry in parentList:
                    if entry['id'] == f['id']:
                        wmbsFile['parents'].add(entry['lfn'])
                filesForJobs[jobid][f['id']] = wmbsFile
            else:
                # If the file is there, just add the location
                filesForJobs[jobid][f['id']]['locations'].add(f['pnn'])

        for j in jobList:
            if j['id'] in filesForJobs.keys():
                j['input_files'] = filesForJobs[j['id']].values()

        return jobList

Пример #5

Показать файл

    def createFilesWithChildren(self, moreParentFiles, acqEra):
        """
        _createFilesWithChildren_

        Create several parentless files and then create child files.
        """
        parentFiles = []
        childFiles = []

        baseLFN = "/store/data/%s/Cosmics/RAW/v1/000/143/316/" % acqEra
        for i in range(10):
            testFile = DBSBufferFile(lfn=baseLFN + makeUUID() + ".root", size=1024,
                                     events=20, checksums={"cksum": 1})
            testFile.setAlgorithm(appName="cmsRun", appVer="CMSSW_3_1_1",
                                  appFam="RAW", psetHash="GIBBERISH",
                                  configContent="MOREGIBBERISH")
            testFile.setDatasetPath("/Cosmics/%s-v1/RAW" % acqEra)

            testFile['block_close_max_wait_time'] = 1000000
            testFile['block_close_max_events'] = 1000000
            testFile['block_close_max_size'] = 1000000
            testFile['block_close_max_files'] = 1000000

            lumis = []
            for j in range(10):
                lumis.append((i * 10) + j)
            testFile.addRun(Run(143316, *lumis))

            testFile.setAcquisitionEra(acqEra)
            testFile.setProcessingVer("1")
            testFile.setGlobalTag("START54::All")
            testFile.create()
            testFile.setLocation("malpaquet")
            parentFiles.append(testFile)

        baseLFN = "/store/data/%s/Cosmics/RECO/v1/000/143/316/" % acqEra
        for i in range(5):
            testFile = DBSBufferFile(lfn=baseLFN + makeUUID() + ".root", size=1024,
                                     events=20, checksums={"cksum": 1})
            testFile.setAlgorithm(appName="cmsRun", appVer="CMSSW_3_1_1",
                                  appFam="RECO", psetHash="GIBBERISH",
                                  configContent="MOREGIBBERISH")
            testFile.setDatasetPath("/Cosmics/%s-v1/RECO" % acqEra)

            testFile['block_close_max_wait_time'] = 1000000
            testFile['block_close_max_events'] = 1000000
            testFile['block_close_max_size'] = 1000000
            testFile['block_close_max_files'] = 1000000

            lumis = []
            for j in range(20):
                lumis.append((i * 20) + j)
            testFile.addRun(Run(143316, *lumis))

            testFile.setAcquisitionEra(acqEra)
            testFile.setProcessingVer("1")
            testFile.setGlobalTag("START54::All")
            testFile.create()
            testFile.setLocation("malpaquet")
            testFile.addParents([parentFiles[i * 2]["lfn"],
                                 parentFiles[i * 2 + 1]["lfn"]])
            testFile.addParents([moreParentFiles[i * 2]["lfn"],
                                 moreParentFiles[i * 2 + 1]["lfn"]])
            childFiles.append(testFile)

        return parentFiles, childFiles

Пример #6

Показать файл

Файл: FixedDelay_t.py Проект: tsarangi/WMCore

    def setUp(self):
        """
        _setUp_

        Create two subscriptions: One that contains a single file and one that
        contains multiple files.
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()
        self.testInit.setSchema(customModules=["WMCore.WMBS"],
                                useDefault=False)

        myThread = threading.currentThread()
        daofactory = DAOFactory(package="WMCore.WMBS",
                                logger=myThread.logger,
                                dbinterface=myThread.dbi)

        locationAction = daofactory(classname="Locations.New")
        locationAction.execute(siteName="site1", seName="somese.cern.ch")

        self.multipleFileFileset = Fileset(name="TestFileset1")
        self.multipleFileFileset.create()
        for i in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["somese.cern.ch"]))
            newFile.addRun(Run(i, *[45 + i]))
            newFile.create()
            self.multipleFileFileset.addFile(newFile)
        self.multipleFileFileset.commit()

        self.singleFileFileset = Fileset(name="TestFileset2")
        self.singleFileFileset.create()
        newFile = File("/some/file/name",
                       size=1000,
                       events=100,
                       locations=set(["somese.cern.ch"]))
        newFile.addRun(Run(1, *[45]))
        newFile.create()
        self.singleFileFileset.addFile(newFile)
        self.singleFileFileset.commit()

        self.multipleFileLumiset = Fileset(name="TestFileset3")
        self.multipleFileLumiset.create()
        for i in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["somese.cern.ch"]))
            newFile.addRun(Run(1, *[45 + i / 3]))
            newFile.create()
            self.multipleFileLumiset.addFile(newFile)
        self.multipleFileLumiset.commit()

        self.singleLumiFileset = Fileset(name="TestFileset4")
        self.singleLumiFileset.create()
        for i in range(10):
            newFile = File(makeUUID(),
                           size=1000,
                           events=100,
                           locations=set(["somese.cern.ch"]))
            newFile.addRun(Run(1, *[45]))
            newFile.create()
            self.singleLumiFileset.addFile(newFile)
        self.singleLumiFileset.commit()

        testWorkflow = Workflow(spec="spec.xml",
                                owner="mnorman",
                                name="wf001",
                                task="Test")
        testWorkflow.create()
        self.multipleFileSubscription = Subscription(
            fileset=self.multipleFileFileset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")
        self.singleFileSubscription = Subscription(
            fileset=self.singleFileFileset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")
        self.multipleLumiSubscription = Subscription(
            fileset=self.multipleFileLumiset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")
        self.singleLumiSubscription = Subscription(
            fileset=self.singleLumiFileset,
            workflow=testWorkflow,
            split_algo="FixedDelay",
            type="Processing")

        self.multipleFileSubscription.create()
        self.singleFileSubscription.create()
        self.multipleLumiSubscription.create()
        self.singleLumiSubscription.create()
        return

Пример #7

Показать файл

Файл: EventAwareLumiByWork_t.py Проект: todor-ivanov/WMCore

    def testLumiMask(self):
        """
        _testLumiMask_

        Test that we can use a lumi-mask to filter good runs/lumis.
        """
        splitter = SplitterFactory()

        # Create 3 files with 100 events per lumi:
        # - file1 with 1 run  of 8 lumis
        # - file2 with 2 runs of 2 lumis each
        # - file3 with 1 run  of 5 lumis
        fileA = File(lfn="/this/is/file1", size=1000, events=800)
        fileB = File(lfn="/this/is/file2", size=1000, events=400)
        fileC = File(lfn="/this/is/file3", size=1000, events=500)

        lumiListA = []
        for lumi in range(8):
            lumiListA.append(10 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.setLocation("somese.cern.ch")
        lumiListB1 = []
        lumiListB2 = []
        for lumi in range(2):
            lumiListB1.append(20 + lumi)
            lumiListB2.append(30 + lumi)
        fileB.addRun(Run(2, *lumiListB1))
        fileB.addRun(Run(3, *lumiListB2))
        fileB.setLocation("somese.cern.ch")
        lumiListC = []
        for lumi in range(5):
            lumiListC.append(40 + lumi)
        fileC.addRun(Run(4, *lumiListC))
        fileC.setLocation("somese.cern.ch")

        testFileset = Fileset(name='Fileset')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testFileset.addFile(fileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)

        # Use a lumi-mask = {1: [[10,14]], 2: [[20,21]], 4: [[40,41]]}
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=850,
                               runs=['1', '2', '4'],
                               lumis=['10,14', '20,21', '40,41'],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1, "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 2, "Two jobs must be in the jobgroup")
        processedLumis = LumiList()
        for job in jobs:
            processedLumis += LumiList(compactList=job['mask'].getRunAndLumis())
        correctLumis = LumiList(compactList={1: [[10, 14]], 2: [[20, 21]], 4: [[40, 41]]})
        self.assertEqual(processedLumis.getCMSSWString(), correctLumis.getCMSSWString())

Пример #8

Показать файл

    def createTestJobGroup(self,
                           nJobs=10,
                           retry_count=1,
                           workloadPath='test',
                           fwjrPath=None,
                           workloadName=makeUUID()):
        """
        Creates a group of several jobs
        """

        myThread = threading.currentThread()
        myThread.transaction.begin()
        testWorkflow = Workflow(spec=workloadPath,
                                owner="cmsdataops",
                                group="cmsdataops",
                                name=workloadName,
                                task="/TestWorkload/ReReco")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFile0 = File(lfn="/this/is/a/parent", size=1024, events=10)
        testFile0.addRun(Run(10, *[12312]))
        testFile0.setLocation('malpaquet')

        testFileA = File(lfn="/this/is/a/lfnA",
                         size=1024,
                         events=10,
                         first_event=88)
        testFileA.addRun(Run(10, *[12312, 12313]))
        testFileA.setLocation('malpaquet')

        testFileB = File(lfn="/this/is/a/lfnB",
                         size=1024,
                         events=10,
                         first_event=88)
        testFileB.addRun(Run(10, *[12314, 12315, 12316]))
        testFileB.setLocation('malpaquet')

        testFile0.create()
        testFileA.create()
        testFileB.create()

        testFileA.addParent(lfn="/this/is/a/parent")
        testFileB.addParent(lfn="/this/is/a/parent")

        for i in range(0, nJobs):
            testJob = Job(name=makeUUID())
            testJob['retry_count'] = retry_count
            testJob['retry_max'] = 10
            testJob['mask'].addRunAndLumis(run=10, lumis=[12312])
            testJob['mask'].addRunAndLumis(run=10, lumis=[12314, 12316])
            testJob['mask']['FirstEvent'] = 100
            testJob['cache_dir'] = os.path.join(self.testDir, testJob['name'])
            testJob['fwjr_path'] = fwjrPath
            os.mkdir(testJob['cache_dir'])
            testJobGroup.add(testJob)
            testJob.create(group=testJobGroup)
            testJob.addFile(testFileA)
            testJob.addFile(testFileB)
            testJob.save()

        testJobGroup.commit()

        testSubscription.acquireFiles(files=[testFileA, testFileB])
        testSubscription.save()
        myThread.transaction.commit()

        return testJobGroup

Пример #9

Показать файл

Файл: ResubmitBlock_t.py Проект: tsarangi/WMCore

    def stuffACDCDatabase(self,
                          numFiles=50,
                          lumisPerFile=20,
                          lumisPerACDCRecord=2):
        """
        _stuffACDCDatabase_

        Fill the ACDC database with ACDC records, both for processing
        and merge
        """
        filesetName = '/%s/DataProcessing' % self.workflowName
        owner = 'unknown'
        group = 'unknown'

        for i in range(numFiles):
            for j in range(1, lumisPerFile + 1, lumisPerACDCRecord):
                lfn = '/store/data/a/%d' % i
                acdcFile = File(lfn=lfn,
                                size=100,
                                events=250,
                                locations=self.validLocations,
                                merged=1)
                run = Run(
                    i + 1,
                    *range(j, min(j + lumisPerACDCRecord, lumisPerFile + 1)))
                acdcFile.addRun(run)
                acdcDoc = {
                    'collection_name': self.workflowName,
                    'collection_type': 'ACDC.CollectionTypes.DataCollection',
                    'files': {
                        lfn: acdcFile
                    },
                    'fileset_name': filesetName,
                    'owner': {
                        'user': owner,
                        'group': group
                    }
                }
                self.acdcDB.queue(acdcDoc)
        filesetName = '/%s/DataProcessing/DataProcessingMergeRECOoutput' % self.workflowName

        for i in range(numFiles):
            for j in range(1, lumisPerFile + 1, lumisPerACDCRecord):
                lfn = '/store/unmerged/b/%d' % i
                acdcFile = File(lfn=lfn,
                                size=100,
                                events=250,
                                locations=set([choice(self.validLocations)]),
                                merged=0)
                run = Run(
                    i + 1,
                    *range(j, min(j + lumisPerACDCRecord, lumisPerFile + 1)))
                acdcFile.addRun(run)
                acdcDoc = {
                    'collection_name': self.workflowName,
                    'collection_type': 'ACDC.CollectionTypes.DataCollection',
                    'files': {
                        lfn: acdcFile
                    },
                    'fileset_name': filesetName,
                    'owner': {
                        'user': owner,
                        'group': group
                    }
                }
                self.acdcDB.queue(acdcDoc)
        self.acdcDB.commit()

        return

Пример #10

Показать файл

Файл: PhEDExInjectorPoller_t.py Проект: scarletnorberg/WMCore

    def stuffDatabase(self):
        """
        _stuffDatabase_

        Fill the dbsbuffer with some files and blocks.  We'll insert a total
        of 5 files spanning two blocks.  There will be a total of two datasets
        inserted into the datbase.

        We'll inject files with the location set as an SE name as well as a
        PhEDEx node name as well.
        """
        myThread = threading.currentThread()

        buffer3Factory = DAOFactory(package="WMComponent.DBS3Buffer",
                                    logger=myThread.logger,
                                    dbinterface=myThread.dbi)
        insertWorkflow = buffer3Factory(classname="InsertWorkflow")
        insertWorkflow.execute("BogusRequest", "BogusTask", 0, 0, 0, 0)

        checksums = {"adler32": "1234", "cksum": "5678"}
        testFileA = DBSBufferFile(lfn=makeUUID(),
                                  size=1024,
                                  events=10,
                                  checksums=checksums,
                                  locations=set(["srm-cms.cern.ch"]))
        testFileA.setAlgorithm(appName="cmsRun",
                               appVer="CMSSW_2_1_8",
                               appFam="RECO",
                               psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileA.setDatasetPath(self.testDatasetA)
        testFileA.addRun(Run(2, *[45]))
        testFileA.create()

        testFileB = DBSBufferFile(lfn=makeUUID(),
                                  size=1024,
                                  events=10,
                                  checksums=checksums,
                                  locations=set(["srm-cms.cern.ch"]))
        testFileB.setAlgorithm(appName="cmsRun",
                               appVer="CMSSW_2_1_8",
                               appFam="RECO",
                               psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileB.setDatasetPath(self.testDatasetA)
        testFileB.addRun(Run(2, *[45]))
        testFileB.create()

        testFileC = DBSBufferFile(lfn=makeUUID(),
                                  size=1024,
                                  events=10,
                                  checksums=checksums,
                                  locations=set(["srm-cms.cern.ch"]))
        testFileC.setAlgorithm(appName="cmsRun",
                               appVer="CMSSW_2_1_8",
                               appFam="RECO",
                               psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileC.setDatasetPath(self.testDatasetA)
        testFileC.addRun(Run(2, *[45]))
        testFileC.create()

        self.testFilesA.append(testFileA)
        self.testFilesA.append(testFileB)
        self.testFilesA.append(testFileC)

        testFileD = DBSBufferFile(lfn=makeUUID(),
                                  size=1024,
                                  events=10,
                                  checksums=checksums,
                                  locations=set(["srm-cms.cern.ch"]))
        testFileD.setAlgorithm(appName="cmsRun",
                               appVer="CMSSW_2_1_8",
                               appFam="RECO",
                               psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileD.setDatasetPath(self.testDatasetB)
        testFileD.addRun(Run(2, *[45]))
        testFileD.create()

        testFileE = DBSBufferFile(lfn=makeUUID(),
                                  size=1024,
                                  events=10,
                                  checksums=checksums,
                                  locations=set(["srm-cms.cern.ch"]))
        testFileE.setAlgorithm(appName="cmsRun",
                               appVer="CMSSW_2_1_8",
                               appFam="RECO",
                               psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileE.setDatasetPath(self.testDatasetB)
        testFileE.addRun(Run(2, *[45]))
        testFileE.create()

        self.testFilesB.append(testFileD)
        self.testFilesB.append(testFileE)

        uploadFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                   logger=myThread.logger,
                                   dbinterface=myThread.dbi)
        datasetAction = uploadFactory(classname="NewDataset")
        createAction = uploadFactory(classname="CreateBlocks")

        datasetAction.execute(datasetPath=self.testDatasetA)
        datasetAction.execute(datasetPath=self.testDatasetB)

        self.blockAName = self.testDatasetA + "#" + makeUUID()
        self.blockBName = self.testDatasetB + "#" + makeUUID()

        newBlockA = DBSBufferBlock(name=self.blockAName,
                                   location="srm-cms.cern.ch",
                                   datasetpath=None)
        newBlockA.setDataset(self.testDatasetA, 'data', 'VALID')
        newBlockA.status = 'Closed'

        newBlockB = DBSBufferBlock(name=self.blockBName,
                                   location="srm-cms.cern.ch",
                                   datasetpath=None)
        newBlockB.setDataset(self.testDatasetB, 'data', 'VALID')
        newBlockB.status = 'Closed'

        createAction.execute(blocks=[newBlockA, newBlockB])

        bufferFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                   logger=myThread.logger,
                                   dbinterface=myThread.dbi)

        setBlock = bufferFactory(classname="DBSBufferFiles.SetBlock")
        setBlock.execute(testFileA["lfn"], self.blockAName)
        setBlock.execute(testFileB["lfn"], self.blockAName)
        setBlock.execute(testFileC["lfn"], self.blockAName)
        setBlock.execute(testFileD["lfn"], self.blockBName)
        setBlock.execute(testFileE["lfn"], self.blockBName)

        fileStatus = bufferFactory(classname="DBSBufferFiles.SetStatus")
        fileStatus.execute(testFileA["lfn"], "LOCAL")
        fileStatus.execute(testFileB["lfn"], "LOCAL")
        fileStatus.execute(testFileC["lfn"], "LOCAL")
        fileStatus.execute(testFileD["lfn"], "LOCAL")
        fileStatus.execute(testFileE["lfn"], "LOCAL")

        associateWorkflow = buffer3Factory(
            classname="DBSBufferFiles.AssociateWorkflowToFile")
        associateWorkflow.execute(testFileA["lfn"], "BogusRequest",
                                  "BogusTask")
        associateWorkflow.execute(testFileB["lfn"], "BogusRequest",
                                  "BogusTask")
        associateWorkflow.execute(testFileC["lfn"], "BogusRequest",
                                  "BogusTask")
        associateWorkflow.execute(testFileD["lfn"], "BogusRequest",
                                  "BogusTask")
        associateWorkflow.execute(testFileE["lfn"], "BogusRequest",
                                  "BogusTask")

        return

Пример #11

Показать файл

class DataDiscovery(TaskAction):
    """
    I am the abstract class for the data discovery.
    Taking care of generalizing different data discovery
    possibilities. Implementing only a common method to
    return a properly formatted output.
    """
    def formatOutput(self, task, requestname, datasetfiles, locations):
        """
        Receives as input the result of the data location
        discovery operations and fill up the WMCore objects.
        """
        self.logger.debug(" Formatting data discovery output ")
        # TEMPORARY
        pnn_psn_map = {}
        sbj = SiteDBJSON({
            "key": self.config.TaskWorker.cmskey,
            "cert": self.config.TaskWorker.cmscert
        })

        wmfiles = []
        event_counter = 0
        lumi_counter = 0
        file_counter = 0
        uniquelumis = set()
        ## Loop over the sorted list of files.
        for lfn, infos in datasetfiles.iteritems():
            ## Skip the file if the block has not been found or has no locations.
            if not infos['BlockName'] in locations or not locations[
                    infos['BlockName']]:
                self.logger.warning(
                    "Skipping %s because its block (%s) has no locations" %
                    (lfn, infos['BlockName']))
                continue
            ## Skip the file if it is not in VALID state.
            if not infos.get('ValidFile', True):
                self.logger.warning("Skipping invalid file %s" % lfn)
                continue

            if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0:
                raise TaskWorkerException(
                    "The CRAB3 server backend refuses to submit jobs to the Grid scheduler\n"
                    +
                    "because you specified useParents=True but some your files have no"
                    + "parents.\nExample: " + lfn)
            ## Createa a WMCore File object.
            wmfile = File(lfn=lfn,
                          events=infos['NumberOfEvents'],
                          size=infos['Size'],
                          checksums=infos['Checksums'],
                          parents=infos['Parents'])
            wmfile['block'] = infos['BlockName']
            wmfile['locations'] = []
            for pnn in locations[infos['BlockName']]:
                if pnn and pnn not in pnn_psn_map:
                    self.logger.debug("Translating PNN %s" % pnn)
                    try:
                        pnn_psn_map[pnn] = sbj.PNNtoPSN(pnn)
                    except KeyError, ke:
                        self.logger.error(
                            "Impossible translating %s to a CMS name through SiteDB"
                            % pnn)
                        pnn_psn_map[pnn] = ''
                    except httplib.HTTPException, ex:
                        self.logger.error("Couldn't map SE to site: %s" % pnn)
                        print "Couldn't map SE to site: %s" % pnn
                        print "got problem: %s" % ex
                        print "got another problem: %s" % ex.__dict__
                if pnn and pnn in pnn_psn_map:
                    if type(pnn_psn_map[pnn]) == list:
                        wmfile['locations'].extend(pnn_psn_map[pnn])
                    else:
                        wmfile['locations'].append(pnn_psn_map[pnn])
            wmfile['workflow'] = requestname
            event_counter += infos['NumberOfEvents']
            for run, lumis in infos['Lumis'].iteritems():
                wmfile.addRun(Run(run, *lumis))
                for lumi in lumis:
                    uniquelumis.add((run, lumi))
                lumi_counter += len(lumis)
            wmfiles.append(wmfile)
            file_counter += 1

Пример #12

Показать файл

    def test10(self):
        """
        _test10_

        Test merging of multiple lumis with holes in the lumi sequence

        Hole is due to no streamer files for the lumi

        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 5]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['maxInputEvents'] = 500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1,
                                                   'LUMI' : 3,
                                                   'STREAM' : "A",
                                                   'FILECOUNT' : 0,
                                                   'INSERT_TIME' : self.currentTime,
                                                   'CLOSE_TIME' : self.currentTime },
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1,
                                                   'LUMI' : 4,
                                                   'STREAM' : "A",
                                                   'FILECOUNT' : 1,
                                                   'INSERT_TIME' : self.currentTime,
                                                   'CLOSE_TIME' : self.currentTime },
                                         transaction = False)

        self.feedStreamersDAO.execute(transaction = False)
        self.fileset1.loadData()

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        for fileid in self.fileset1.getFiles(type = 'id'):
            self.acquireFilesDAO.execute(self.subscription1['id'], fileid,
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        for fileid in self.fileset1.getFiles(type = 'id'):
            self.completeFilesDAO.execute(self.subscription1['id'], fileid,
                                          transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return

Пример #13

Показать файл

Файл: DataCollectionService_t.py Проект: todor-ivanov/WMCore

    def testChunking(self):
        """
        _testChunking_

        Insert a workload and files that have several distinct sets of
        locations.  Verify that the chunks are created correctly and that they
        only groups files that have the same set of locations.  Also verify that
        the chunks are pulled out of ACDC correctly.
        """
        dcs = DataCollectionService(url=self.testInit.couchUrl, database="wmcore-acdc-datacollectionsvc")

        testFileA = File(lfn=makeUUID(), size=1024, events=1024)
        testFileA.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileA.addRun(Run(1, 1, 2))
        testFileB = File(lfn=makeUUID(), size=1024, events=1024)
        testFileB.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileB.addRun(Run(1, 3, 4))
        testFileC = File(lfn=makeUUID(), size=1024, events=1024)
        testFileC.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileC.addRun(Run(1, 5, 6))
        testJobA = self.getMinimalJob()
        testJobA.addFile(testFileA)
        testJobA.addFile(testFileB)
        testJobA.addFile(testFileC)

        testFileD = File(lfn=makeUUID(), size=1024, events=1024)
        testFileD.setLocation(["cmssrm.fnal.gov"])
        testFileD.addRun(Run(2, 1, 2))
        testFileE = File(lfn=makeUUID(), size=1024, events=1024)
        testFileE.setLocation(["cmssrm.fnal.gov"])
        testFileE.addRun(Run(2, 3, 4))
        testJobB = self.getMinimalJob()
        testJobB.addFile(testFileD)
        testJobB.addFile(testFileE)

        testFileF = File(lfn=makeUUID(), size=1024, events=1024, parents={"/some/parent/F"})
        testFileF.setLocation(["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"])
        testFileF.addRun(Run(3, 1, 2))
        testFileG = File(lfn=makeUUID(), size=1024, events=1024, parents={"/some/parent/G"})
        testFileG.setLocation(["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"])
        testFileG.addRun(Run(3, 3, 4))
        testFileH = File(lfn=makeUUID(), size=1024, events=1024, parents={"/some/parent/H"})
        testFileH.setLocation(["cmssrm.fnal.gov", "castor.cern.ch", "srm.ral.uk"])
        testFileH.addRun(Run(3, 5, 6))
        testJobC = self.getMinimalJob()
        testJobC.addFile(testFileF)
        testJobC.addFile(testFileG)
        testJobC.addFile(testFileH)

        testFileI = File(lfn=makeUUID(), size=1024, events=1024, merged=True)
        testFileI.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileI.addRun(Run(4, 1, 2))
        testFileJ = File(lfn=makeUUID(), size=1024, events=1024, merged=True)
        testFileJ.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileJ.addRun(Run(4, 3, 4))
        testFileK = File(lfn=makeUUID(), size=1024, events=1024, merged=True)
        testFileK.setLocation(["cmssrm.fnal.gov", "castor.cern.ch"])
        testFileK.addRun(Run(4, 5, 6))
        testJobD = self.getMinimalJob()
        testJobD.addFile(testFileI)
        testJobD.addFile(testFileJ)
        testJobD.addFile(testFileK)

        dcs.failedJobs([testJobA, testJobB, testJobC, testJobD])
        chunks = dcs.chunkFileset("ACDCTest", "/ACDCTest/reco", chunkSize=5)

        self.assertEqual(len(chunks), 4, "Error: There should be four chunks: %s" % len(chunks))

        goldenMetaData = {1: {"lumis": 2, "locations": ["castor.cern.ch", "cmssrm.fnal.gov"], "events": 1024},
                          2: {"lumis": 4, "locations": ["cmssrm.fnal.gov"], "events": 2048},
                          3: {"lumis": 6, "locations": ["castor.cern.ch", "cmssrm.fnal.gov", "srm.ral.uk"],
                              "events": 3072},
                          5: {"lumis": 10, "locations": ["castor.cern.ch", "cmssrm.fnal.gov"], "events": 5120}}

        testFiles = [testFileA, testFileB, testFileC, testFileI, testFileJ, testFileK]
        lastFile = testFileA
        for testFile in testFiles:
            if lastFile["lfn"] < testFile["lfn"]:
                lastFile = testFile

        testFiles.remove(lastFile)

        goldenFiles = {1: [lastFile],
                       2: [testFileD, testFileE],
                       3: [testFileF, testFileG, testFileH],
                       5: testFiles}

        for chunk in chunks:
            chunkMetaData = dcs.getChunkInfo("ACDCTest", "/ACDCTest/reco",
                                             chunk["offset"], chunk["files"])

            self.assertEqual(chunkMetaData["files"], chunk["files"])
            self.assertEqual(chunkMetaData["lumis"], chunk["lumis"])
            self.assertEqual(chunkMetaData["events"], chunk["events"])
            self.assertEqual(chunkMetaData["locations"], chunk["locations"])

            self.assertTrue(chunk["files"] in goldenMetaData, "Error: Extra chunk found.")
            self.assertEqual(chunk["lumis"], goldenMetaData[chunk["files"]]["lumis"],
                             "Error: Lumis in chunk is wrong.")
            self.assertEqual(chunk["locations"], goldenMetaData[chunk["files"]]["locations"],
                             "Error: Locations in chunk is wrong.")
            self.assertEqual(chunk["events"], goldenMetaData[chunk["files"]]["events"],
                             "Error: Events in chunk is wrong.")
            del goldenMetaData[chunk["files"]]

            chunkFiles = dcs.getChunkFiles("ACDCTest", "/ACDCTest/reco", chunk["offset"], chunk["files"])

            self.assertTrue(chunk["files"] in goldenFiles, "Error: Extra chunk found.")
            goldenChunkFiles = goldenFiles[chunk["files"]]
            self.assertEqual(len(chunkFiles), len(goldenChunkFiles))

            for chunkFile in chunkFiles:
                foundFile = None
                for goldenChunkFile in goldenChunkFiles:
                    if chunkFile["lfn"] == goldenChunkFile["lfn"]:
                        foundFile = goldenChunkFile
                        break

                self.assertIsNotNone(foundFile, "Error: Missing chunk file: %s, %s" % (chunkFiles, goldenChunkFiles))
                self.assertEqual(set(foundFile["parents"]), chunkFile["parents"], "Error: File parents should match.")
                self.assertEqual(foundFile["merged"], chunkFile["merged"], "Error: File merged status should match.")
                self.assertEqual(foundFile["locations"], chunkFile["locations"], "Error: File locations should match.")
                self.assertEqual(foundFile["events"], chunkFile["events"])
                self.assertEqual(foundFile["size"], chunkFile["size"])
                self.assertEqual(len(foundFile["runs"]), len(chunkFile["runs"]), "Error: Wrong number of runs.")
                for run in foundFile["runs"]:
                    runMatch = False
                    for chunkRun in chunkFile["runs"]:
                        if chunkRun.run == run.run and chunkRun.lumis == run.lumis:
                            runMatch = True
                            break

                    self.assertTrue(runMatch, "Error: Run information is wrong.")

            del goldenFiles[chunk["files"]]

        singleChunk = dcs.singleChunkFileset("ACDCTest", "/ACDCTest/reco")
        self.assertEqual(singleChunk, {"offset": 0,
                                       "files": 11,
                                       "events": 11264,
                                       "lumis": 22,
                                       "locations": {"castor.cern.ch", "cmssrm.fnal.gov", "srm.ral.uk"}},
                         "Error: Single chunk metadata is wrong")

        return

Пример #14

Показать файл

Файл: DataCollectionService_t.py Проект: todor-ivanov/WMCore

    def testGetLumiWhitelist(self):
        """
        _testGetLumiWhitelist_

        Verify that the ACDC whitelist generation code works correctly.  We'll
        add jobs with the following lumi info:
          # Run 1, lumis [1, 2, 3], [4, 6], [7], [9], [11, 12]
          # Run 2, lumis [5, 6, 7], [10, 11, 12], [15]
          # Run 3, lumis [20]

        And should get out a whitelist that looks like this:
          {"1": [[1, 4], [6, 7], [9, 9], [11, 12]],
           "2": [[5, 7], [10, 12], [15, 15]],
           "3": [[20, 20]]}
        """
        dcs = DataCollectionService(url=self.testInit.couchUrl, database="wmcore-acdc-datacollectionsvc")

        testFileA = File(lfn=makeUUID(), size=1024, events=1024)
        testFileA.addRun(Run(1, 1, 2))
        testFileB = File(lfn=makeUUID(), size=1024, events=1024)
        testFileB.addRun(Run(1, 3))
        testJobA = self.getMinimalJob()
        testJobA.addFile(testFileA)
        testJobA.addFile(testFileB)

        testFileC = File(lfn=makeUUID(), size=1024, events=1024)
        testFileC.addRun(Run(1, 4, 6))
        testJobB = self.getMinimalJob()
        testJobB.addFile(testFileC)

        testFileD = File(lfn=makeUUID(), size=1024, events=1024)
        testFileD.addRun(Run(1, 7))
        testJobC = self.getMinimalJob()
        testJobC.addFile(testFileD)

        testFileE = File(lfn=makeUUID(), size=1024, events=1024)
        testFileE.addRun(Run(1, 11, 12))
        testJobD = self.getMinimalJob()
        testJobD.addFile(testFileE)

        testFileF = File(lfn=makeUUID(), size=1024, events=1024)
        testFileF.addRun(Run(2, 5, 6, 7))
        testJobE = self.getMinimalJob()
        testJobE.addFile(testFileF)

        testFileG = File(lfn=makeUUID(), size=1024, events=1024)
        testFileG.addRun(Run(2, 10, 11, 12))
        testJobF = self.getMinimalJob()
        testJobF.addFile(testFileG)

        testFileH = File(lfn=makeUUID(), size=1024, events=1024)
        testFileH.addRun(Run(2, 15))
        testJobG = self.getMinimalJob()
        testJobG.addFile(testFileH)

        testFileI = File(lfn=makeUUID(), size=1024, events=1024)
        testFileI.addRun(Run(3, 20))
        testJobH = self.getMinimalJob()
        testJobH.addFile(testFileI)

        testFileJ = File(lfn=makeUUID(), size=1024, events=1024)
        testFileJ.addRun(Run(1, 9))
        testJobI = self.getMinimalJob()
        testJobI.addFile(testFileJ)

        dcs.failedJobs([testJobA, testJobB, testJobC, testJobD, testJobE,
                        testJobF, testJobG, testJobH, testJobI])
        whiteList = dcs.getLumiWhitelist("ACDCTest", "/ACDCTest/reco")

        self.assertEqual(len(whiteList), 3,
                         "Error: There should be 3 runs.")
        self.assertEqual(whiteList["1"], [[1, 4], [6, 7], [9, 9], [11, 12]],
                         "Error: Whitelist for run 1 is wrong.")
        self.assertEqual(whiteList["2"], [[5, 7], [10, 12], [15, 15]],
                         "Error: Whitelist for run 2 is wrong.")
        self.assertEqual(whiteList["3"], [[20, 20]],
                         "Error: Whitelist for run 3 is wrong.")

        correctLumiList = LumiList(compactList={"1": [[1, 4], [6, 7], [9, 9], [11, 12]],
                                                "2": [[5, 7], [10, 12], [15, 15]],
                                                "3": [[20, 20]]})
        testLumiList = dcs.getLumilistWhitelist("ACDCTest", "/ACDCTest/reco")
        self.assertEqual(correctLumiList.getCMSSWString(), testLumiList.getCMSSWString())

        return

Пример #15

Показать файл

    def testC_ACDCTest(self):
        """
        _ACDCTest_

        Test whether we can get a goodRunList out of ACDC
        and process it correctly.
        """
        workload = self.createTestWorkload()
        dcs = DataCollectionService(url=self.testInit.couchUrl,
                                    database=self.testInit.couchDbName)

        testFileA = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         locations="T1_US_FNAL_Disk")
        testFileA.addRun(Run(1, 1, 2))
        testFileA.create()
        testFileB = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         locations="T1_US_FNAL_Disk")
        testFileB.addRun(Run(1, 3))
        testFileB.create()
        testJobA = getJob(workload)
        testJobA.addFile(testFileA)
        testJobA.addFile(testFileB)

        testFileC = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         locations="T1_US_FNAL_Disk")
        testFileC.addRun(Run(1, 4, 6))
        testFileC.create()
        testJobB = getJob(workload)
        testJobB.addFile(testFileC)

        testFileD = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         locations="T1_US_FNAL_Disk")
        testFileD.addRun(Run(1, 7))
        testFileD.create()
        testJobC = getJob(workload)
        testJobC.addFile(testFileD)

        testFileE = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         locations="T1_US_FNAL_Disk")
        testFileE.addRun(Run(1, 11, 12))
        testFileE.create()
        testJobD = getJob(workload)
        testJobD.addFile(testFileE)

        testFileF = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         locations="T1_US_FNAL_Disk")
        testFileF.addRun(Run(2, 5, 6, 7))
        testFileF.create()
        testJobE = getJob(workload)
        testJobE.addFile(testFileF)

        testFileG = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         locations="T1_US_FNAL_Disk")
        testFileG.addRun(Run(2, 10, 11, 12))
        testFileG.create()
        testJobF = getJob(workload)
        testJobF.addFile(testFileG)

        testFileH = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         locations="T1_US_FNAL_Disk")
        testFileH.addRun(Run(2, 15))
        testFileH.create()
        testJobG = getJob(workload)
        testJobG.addFile(testFileH)

        testFileI = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         locations="T1_US_FNAL_Disk")
        testFileI.addRun(Run(3, 20))
        testFileI.create()
        testJobH = getJob(workload)
        testJobH.addFile(testFileI)

        testFileJ = File(lfn=makeUUID(),
                         size=1024,
                         events=1024,
                         locations="T1_US_FNAL_Disk")
        testFileJ.addRun(Run(1, 9))
        testFileJ.create()
        testJobI = getJob(workload)
        testJobI.addFile(testFileJ)

        # dcs.failedJobs([testJobA, testJobB, testJobC, testJobD, testJobE,
        #                testJobF, testJobG, testJobH, testJobI])

        dcs.failedJobs([testJobA, testJobD, testJobH])

        baseName = makeUUID()

        testFileset = Fileset(name=baseName)
        testFileset.create()
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)
        testFileset.addFile(testFileD)
        testFileset.addFile(testFileE)
        testFileset.addFile(testFileF)
        testFileset.addFile(testFileG)
        testFileset.addFile(testFileH)
        testFileset.addFile(testFileI)
        testFileset.addFile(testFileJ)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="LumiBased",
                                        type="Processing")
        testSubscription.create()

        splitter = SplitterFactory()
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)

        jobGroups = jobFactory(
            lumis_per_job=100,
            halt_job_on_file_boundaries=False,
            splitOnRun=True,
            collectionName=workload.name(),
            filesetName=workload.getTask("reco").getPathName(),
            owner="evansde77",
            group="DMWM",
            couchURL=self.testInit.couchUrl,
            couchDB=self.testInit.couchDbName,
            performance=self.performanceParams)

        self.assertEqual(jobGroups[0].jobs[0]['mask'].getRunAndLumis(),
                         {1: [[1, 2], [3, 3], [11, 12]]})
        self.assertEqual(jobGroups[0].jobs[1]['mask'].getRunAndLumis(),
                         {3: [[20, 20]]})

        return

Пример #16

Показать файл

Файл: Condition_t.py Проект: xmniu/T0

    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules = ["T0.WMBS"])

        self.splitterFactory = SplitterFactory(package = "T0.JobSplitting")

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package = "T0.WMBS",
                                logger = logging,
                                dbinterface = myThread.dbi)

        wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS",
                                    logger = logging,
                                    dbinterface = myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state)
                                    VALUES (1, 'SomeSite', 1)
                                    """, transaction = False)
        myThread.dbi.processData("""INSERT INTO wmbs_location_pnn
                                    (location, pnn)
                                    VALUES (1, 'SomePNN')
                                    """, transaction = False)

        insertRunDAO = daoFactory(classname = "RunConfig.InsertRun")
        insertRunDAO.execute(binds = { 'RUN' : 1,
                                       'HLTKEY' : "someHLTKey" },
                             transaction = False)

        insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection")
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 1 },
                              transaction = False)

        insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream")
        insertStreamDAO.execute(binds = { 'STREAM' : "Express" },
                                transaction = False)

        insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "Express", "TestFileset1")

        insertStreamerDAO = daoFactory(classname = "RunConfig.InsertStreamer")
        insertStreamerDAO.execute(binds = { 'RUN' : 1,
                                            'LUMI' : 1,
                                            'STREAM' : "Express",
                                            'TIME' : int(time.time()),
                                            'LFN' : "/streamer",
                                            'FILESIZE' : 0,
                                            'EVENTS' : 0 },
                                  transaction = False)

        insertPromptCalibrationDAO = daoFactory(classname = "RunConfig.InsertPromptCalibration")
        insertPromptCalibrationDAO.execute( { 'RUN' : 1,
                                              'STREAM' : "Express" },
                                            transaction = False)

        self.fileset1 = Fileset(name = "TestFileset1")
        self.fileset1.create()

        workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test")
        workflow1.create()

        self.subscription1  = Subscription(fileset = self.fileset1,
                                           workflow = workflow1,
                                           split_algo = "Condition",
                                           type = "Condition")
        self.subscription1.create()

        # set parentage chain and sqlite fileset
        alcaRecoFile = File("/alcareco", size = 0, events = 0)
        alcaRecoFile.addRun(Run(1, *[1]))
        alcaRecoFile.setLocation("SomePNN", immediateSave = False)
        alcaRecoFile.create()
        alcaPromptFile = File("/alcaprompt", size = 0, events = 0)
        alcaPromptFile.addRun(Run(1, *[1]))
        alcaPromptFile.setLocation("SomePNN", immediateSave = False)
        alcaPromptFile.create()
        sqliteFile = File("/sqlite", size = 0, events = 0)
        sqliteFile.create()
        self.fileset1.addFile(sqliteFile)
        self.fileset1.commit()

        results = myThread.dbi.processData("""SELECT lfn FROM wmbs_file_details
                                              """,
                                           transaction = False)[0].fetchall()

        setParentageDAO = wmbsDaoFactory(classname = "Files.SetParentage")
        setParentageDAO.execute(binds = [ { 'parent' : "/streamer",
                                            'child' : "/alcareco" },
                                          { 'parent' : "/alcareco",
                                            'child' : "/alcaprompt" },
                                          { 'parent' : "/alcaprompt",
                                            'child' : "/sqlite" } ],
                                transaction = False)

        # default split parameters
        self.splitArgs = {}
        self.splitArgs['runNumber'] = 1
        self.splitArgs['streamName'] = "Express"

        return

Пример #17

Показать файл

    def test_NotEnoughEvents(self):
        """
        _test_NotEnoughEvents_

        Checks whether jobs are not created when there are not enough files (actually, events)
        according to the events_per_job requested to the splitter algorithm
        """
        splitter = SplitterFactory()

        # Very small fileset (single file) without enough events
        testSubscription = self.createSubscription(nFiles=1, lumisPerFile=2)

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroups = jobFactory(lumis_per_job=5,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 0)

        # Still a small fileset (two files) without enough events
        testSubscription = self.createSubscription(nFiles=2, lumisPerFile=2)

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroups = jobFactory(lumis_per_job=5,
                               performance=self.performanceParams,
                               splitOnRun=False)

        self.assertEqual(len(jobGroups), 0)

        # Finally an acceptable fileset size (three files) with enough events
        testSubscription = self.createSubscription(nFiles=3, lumisPerFile=2)

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroups = jobFactory(lumis_per_job=5,
                               performance=self.performanceParams,
                               splitOnRun=False)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 2)
        self.assertEqual(len(jobs[0]['input_files']), 3)
        self.assertEqual(len(jobs[1]['input_files']), 1)
        self.assertEqual(jobs[0]['mask'].getRunAndLumis(), {
            0: [[0, 1]],
            1: [[100, 101]],
            2: [[200, 200]]
        })
        self.assertEqual(jobs[1]['mask'].getRunAndLumis(), {2: [[201, 201]]})

        # Test fileset with a single run and splitOnRun=True
        testFileset = Fileset(name="FilesetA")
        for i in range(3):
            testFile = File(lfn="/this/is/file%s" % i,
                            size=1024,
                            events=200,
                            locations="T1_US_FNAL_Disk")
            lumis = [i * 2 + val for val in range(1, 3)]
            testFile.addRun(Run(1, lumis))
            testFile.create()
            testFileset.addFile(testFile)
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="LumiBased",
                                        type="Processing")
        testSubscription.create()

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        jobGroups = jobFactory(lumis_per_job=5,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 2)
        self.assertEqual(len(jobs[0]['input_files']), 3)
        self.assertEqual(len(jobs[1]['input_files']), 1)
        self.assertEqual(jobs[0]['mask'].getRunAndLumis(),
                         {1: [[1, 2], [3, 4], [5, 5]]})
        self.assertEqual(jobs[1]['mask'].getRunAndLumis(), {1: [[6, 6]]})

        return

Пример #18

Показать файл

    def formatOutput(self, task, requestname, datasetfiles, locations,
                     tempDir):
        """
        Receives as input the result of the data location
        discovery operations and fill up the WMCore objects.
        """
        self.logger.debug(" Formatting data discovery output ")

        wmfiles = []
        event_counter = 0
        lumi_counter = 0
        uniquelumis = set()
        datasetLumis = {}
        ## Loop over the sorted list of files.
        # can't affort one message from CRIC per file, unless critical !
        previousLogLevel = self.logger.getEffectiveLevel()
        resourceCatalog = CRIC(logger=self.logger)
        self.logger.setLevel(logging.ERROR)
        for lfn, infos in datasetfiles.iteritems():
            ## Skip the file if the block has not been found or has no locations.
            if not infos['BlockName'] in locations or not locations[
                    infos['BlockName']]:
                self.logger.warning(
                    "Skipping %s because its block (%s) has no locations" %
                    (lfn, infos['BlockName']))
                continue
            ## Skip the file if it is not in VALID state.
            if not infos.get('ValidFile', True):
                self.logger.warning("Skipping invalid file %s" % lfn)
                continue

            if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0:
                raise TaskWorkerException(
                    "The CRAB3 server backend refuses to submit jobs to the Grid scheduler\n"
                    +
                    "because you specified useParents=True but some your files have no"
                    + "parents.\nExample: " + lfn)
            ## Create a WMCore File object.
            try:
                size = infos['FileSize']
                checksums = {
                    'Checksum': infos['Checksum'],
                    'Adler32': infos['Adler32'],
                    'Md5': infos['Md5']
                }
            except:
                #This is so that the task worker does not crash if an old version of WMCore is used (the interface of an API suddenly changed).
                # We may want to remove the try/except and the following two lines eventually, but keeping them for the moment so other devels won't be affected
                #See this WMCore commit: https://github.com/dmwm/WMCore/commit/2afc01ae571390f5fa009dd258be757adac89c28#diff-374b7a6640288184175057234e393e1cL204
                size = infos['Size']
                checksums = infos['Checksums']
            wmfile = File(lfn=lfn,
                          events=infos['NumberOfEvents'],
                          size=size,
                          checksums=checksums,
                          parents=infos['Parents'])
            wmfile['block'] = infos['BlockName']
            try:
                wmfile['locations'] = resourceCatalog.PNNstoPSNs(
                    locations[wmfile['block']])
            except Exception as ex:
                self.logger.error(
                    "Impossible translating %s to a CMS name through CMS Resource Catalog"
                    % locations[wmfile['block']])
                self.logger.error("got this exception:\n %s" % ex)
                raise
            wmfile['workflow'] = requestname
            event_counter += infos['NumberOfEvents']
            for run, lumis in infos['Lumis'].iteritems():
                datasetLumis.setdefault(run, []).extend(lumis)
                wmfile.addRun(Run(run, *lumis))
                for lumi in lumis:
                    uniquelumis.add((run, lumi))
                lumi_counter += len(lumis)
            wmfiles.append(wmfile)

        self.logger.setLevel(previousLogLevel)
        uniquelumis = len(uniquelumis)
        self.logger.debug('Tot events found: %d' % event_counter)
        self.logger.debug('Tot lumis found: %d' % uniquelumis)
        self.logger.debug('Duplicate lumis found: %d' %
                          (lumi_counter - uniquelumis))
        self.logger.debug('Tot files found: %d' % len(wmfiles))

        self.logger.debug(
            "Starting to create compact lumilists for input dataset")
        datasetLumiList = LumiList(runsAndLumis=datasetLumis)
        datasetLumis = datasetLumiList.getCompactList()
        datasetDuplicateLumis = datasetLumiList.getDuplicates().getCompactList(
        )
        self.logger.debug(
            "Finished to create compact lumilists for input dataset")
        with open(os.path.join(tempDir, "input_dataset_lumis.json"),
                  "w") as fd:
            json.dump(datasetLumis, fd)
        with open(os.path.join(tempDir, "input_dataset_duplicate_lumis.json"),
                  "w") as fd:
            json.dump(datasetDuplicateLumis, fd)

        return Result(task=task,
                      result=Fileset(name='FilesToSplit', files=set(wmfiles)))

Пример #19

Показать файл

Файл: EventAwareLumiByWork_t.py Проект: todor-ivanov/WMCore

    def testNoFileSplitNoHardLimit(self):
        """
        _testNoFileSplitNoHardLimit_

        In this case we don't split on file boundaries, check different combination of files
        make sure we make the most of the splitting, e.g. include many zero event files in
        a single job.
        """
        splitter = SplitterFactory()

        # Create 100 files with 7 lumi per file and 0 events per lumi on average.
        testSubscription = self.createSubscription(nFiles=100, lumisPerFile=7, twoSites=False, nEventsPerFile=0)
        jobFactory = splitter(package="WMCore.DataStructs", subscription=testSubscription)

        # First test, the optimal settings are 360 events per job. As we have files with 0 events per lumi, this will
        # configure the splitting to a single job containing all files
        jobGroups = jobFactory(halt_job_on_file_boundaries=False, splitOnRun=False, events_per_job=360,
                               performance=self.performanceParams)

        # One job in one job group with 100 files
        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 1)
        self.assertEqual(len(jobs[0]['input_files']), 100)

        # Create 7 files, each one with different lumi/event distributions
        testFileset = Fileset(name="FilesetA")
        testFileA = self.createFile("/this/is/file1", 250, 0, 5, "blenheim")
        testFileB = self.createFile("/this/is/file2", 600, 1, 1, "blenheim")
        testFileC = self.createFile("/this/is/file3", 1200, 2, 2, "blenheim")
        testFileD = self.createFile("/this/is/file4", 100, 3, 1, "blenheim")
        testFileE = self.createFile("/this/is/file5", 30, 4, 1, "blenheim")
        testFileF = self.createFile("/this/is/file6", 10, 5, 1, "blenheim")
        testFileG = self.createFile("/this/is/file7", 153, 6, 3, "blenheim")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)
        testFileset.addFile(testFileD)
        testFileset.addFile(testFileE)
        testFileset.addFile(testFileF)
        testFileset.addFile(testFileG)

        testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork", type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs", subscription=testSubscription)
        # Split the work targeting 150 events per job
        jobGroups = jobFactory(halt_job_on_file_boundaries=False, splitOnRun=False, events_per_job=150,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 7)

        # Test interactions of this algorithm with splitOnRun = True
        # Make 2 files, one with 3 runs and a second one with the last run of the first
        fileA = File(lfn="/this/is/file1", size=1000, events=2400)
        lumiListA = []
        lumiListB = []
        lumiListC = []
        for lumi in range(8):
            lumiListA.append(1 + lumi)
            lumiListB.append(1 + lumi)
            lumiListC.append(1 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.addRun(Run(2, *lumiListA))
        fileA.addRun(Run(3, *lumiListA))
        fileA.setLocation("malpaquet")

        fileB = self.createFile('/this/is/file2', 200, 3, 5, "malpaquet")

        testFileset = Fileset(name='FilesetB')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testSubscription = Subscription(fileset=testFileset, workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork", type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs", subscription=testSubscription)
        # The settings for this splitting are 700 events per job
        jobGroups = jobFactory(splitOnRun=True, halt_job_on_file_boundaries=False, events_per_job=700,
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 6)
        # Make sure each job has one run
        for job in jobs:
            self.assertEqual(len(job['mask'].getRunAndLumis()), 1)

Пример #20

Показать файл

    def test00(self):
        """
        _test00_

        Test that the job name prefix feature works
        Test event threshold (single job creation)

        Test that only closed lumis are used

        Test check on express release

        """
        insertClosedLumiBinds = []
        for lumi in [1]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append({
                    'RUN': 1,
                    'LUMI': lumi,
                    'STREAM': "Express",
                    'FILECOUNT': filecount,
                    'INSERT_TIME': self.currentTime,
                    'CLOSE_TIME': 0
                })
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        jobGroups = jobFactory(maxInputEvents=200)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds,
                                         transaction=False)

        jobGroups = jobFactory(maxInputEvents=200)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.finalCloseLumis()

        jobGroups = jobFactory(maxInputEvents=200)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.releaseExpressDAO.execute(binds={'RUN': 1}, transaction=False)

        jobGroups = jobFactory(maxInputEvents=200)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("Express-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(self.getNumActiveSplitLumis(), 0,
                         "ERROR: Split lumis were created")

        return

Пример #21

Показать файл

Файл: EventAwareLumiByWork_t.py Проект: todor-ivanov/WMCore

    def testRunWhiteList(self):
        """
        _testRunWhiteList_

        Test that we can use a run white list to filter good runs/lumis.
        """
        splitter = SplitterFactory()

        # Create 3 files with 100 events per lumi:
        # - file1 with 1 run  of 8 lumis
        # - file2 with 2 runs of 2 lumis each
        # - file3 with 1 run  of 5 lumis
        fileA = File(lfn="/this/is/file1", size=1000, events=800)
        fileB = File(lfn="/this/is/file2", size=1000, events=400)
        fileC = File(lfn="/this/is/file3", size=1000, events=500)

        lumiListA = []
        for lumi in range(8):
            lumiListA.append(10 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.setLocation("somese.cern.ch")
        lumiListB1 = []
        lumiListB2 = []
        for lumi in range(2):
            lumiListB1.append(20 + lumi)
            lumiListB2.append(30 + lumi)
        fileB.addRun(Run(2, *lumiListB1))
        fileB.addRun(Run(3, *lumiListB2))
        fileB.setLocation("somese.cern.ch")
        lumiListC = []
        for lumi in range(5):
            lumiListC.append(40 + lumi)
        fileC.addRun(Run(4, *lumiListC))
        fileC.setLocation("somese.cern.ch")

        testFileset = Fileset(name='Fileset')
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testFileset.addFile(fileC)

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiByWork",
                                        type="Processing")
        jobFactory = splitter(package="WMCore.DataStructs",
                              subscription=testSubscription)

        # Split with no breaks
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=725,
                               runWhitelist=[1, 4],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 2)
        for job in jobs:
            for run in job['mask'].getRunAndLumis():
                self.assertIn(run, [1, 4])

        # Re-split with a break on runs
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=True,
                               events_per_job=595,
                               runWhitelist=[1, 3, 4],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 4)
        self.enforceLimits(jobs=jobs, runsPerJob=1)
        for job in jobs:
            for run in job['mask'].getRunAndLumis():
                self.assertIn(run, [1, 3, 4])

        # Re-split with a break on files
        jobGroups = jobFactory(halt_job_on_file_boundaries=True,
                               splitOnRun=False,
                               events_per_job=595,
                               runWhitelist=[1, 2, 3],
                               performance=self.performanceParams)

        self.assertEqual(len(jobGroups), 1)
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 3)
        self.enforceLimits(jobs=jobs, filesPerJob=1)
        for job in jobs:
            for run in job['mask'].getRunAndLumis():
                self.assertIn(run, [1, 2, 3])

Пример #22

Показать файл

Файл: EventAwareLumiBased_t.py Проект: ticoann/WMCore

    def testB_NoFileSplitNoHardLimit(self):
        """
        _testB_NoFileSplitNoHardLimit_

        In this case we don't split on file boundaries, check different combination of files
        make sure we make the most of the splitting, e.g. include many zero event files in
        a single job.
        """
        splitter = SplitterFactory()

        #Create 100 files with 7 lumi per file and 0 events per lumi on average.
        testSubscription = self.createSubscription(nFiles=100,
                                                   lumisPerFile=7,
                                                   twoSites=False,
                                                   nEventsPerFile=0)
        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)

        #First test, the optimal settings are 360 events per job
        #As we have files with 0 events per lumi, this will configure the splitting to
        #a single job containing all files
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=360)
        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 1, "There should be 1 job")
        self.assertEqual(len(jobs[0]['input_files']), 100,
                         "All 100 files must be in the job")

        #Create 7 files, each one with different lumi/event distributions
        testFileset = Fileset(name="FilesetA")
        testFileset.create()
        testFileA = self.createFile("/this/is/file1", 250, 0, 5,
                                    "otherse.cern.ch")
        testFileB = self.createFile("/this/is/file2", 600, 1, 1,
                                    "otherse.cern.ch")
        testFileC = self.createFile("/this/is/file3", 1200, 2, 2,
                                    "otherse.cern.ch")
        testFileD = self.createFile("/this/is/file4", 100, 3, 1,
                                    "otherse.cern.ch")
        testFileE = self.createFile("/this/is/file5", 30, 4, 1,
                                    "otherse.cern.ch")
        testFileF = self.createFile("/this/is/file6", 10, 5, 1,
                                    "otherse.cern.ch")
        testFileG = self.createFile("/this/is/file7", 151, 6, 3,
                                    "otherse.cern.ch")
        testFileset.addFile(testFileA)
        testFileset.addFile(testFileB)
        testFileset.addFile(testFileC)
        testFileset.addFile(testFileD)
        testFileset.addFile(testFileE)
        testFileset.addFile(testFileF)
        testFileset.addFile(testFileG)
        testFileset.commit()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        testSubscription.create()

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        #Optimal settings are: jobs with 150 events per job
        #This means, the first file must be splitted in 3 lumis per job which would leave room
        #for another lumi in the second job, but the second file has a lumi too big for that
        #The 3rd job only contains the second file, the fourth and fifth job split the third file
        jobGroups = jobFactory(halt_job_on_file_boundaries=False,
                               splitOnRun=False,
                               events_per_job=150)

        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 8, "Eight jobs must be in the jobgroup")
        self.assertEqual(jobs[0]["mask"].getRunAndLumis(), {0L: [[0L, 2L]]},
                         "Wrong mask for the first job")
        self.assertEqual(jobs[1]["mask"].getRunAndLumis(), {0L: [[3L, 4L]]},
                         "Wrong mask for the second job")
        self.assertEqual(jobs[2]["mask"].getRunAndLumis(), {1L: [[1L, 1L]]},
                         "Wrong mask for the third job")
        self.assertEqual(jobs[3]["mask"].getRunAndLumis(), {2L: [[4L, 4L]]},
                         "Wrong mask for the fourth job")
        self.assertEqual(jobs[4]["mask"].getRunAndLumis(), {2L: [[5L, 5L]]},
                         "Wrong mask for the fifth job")
        self.assertEqual(jobs[5]["mask"].getRunAndLumis(), {
            3L: [[3L, 3L]],
            4L: [[4L, 4L]],
            5L: [[5L, 5L]]
        }, "Wrong mask for the sixth job")
        self.assertEqual(jobs[6]["mask"].getRunAndLumis(), {6L: [[18L, 19L]]},
                         "Wrong mask for the seventh job")
        self.assertEqual(jobs[7]["mask"].getRunAndLumis(), {6L: [[20L, 20L]]},
                         "Wrong mask for the seventh job")
        #Test interactions of this algorithm with splitOnRun = True
        #Make 2 files, one with 3 runs and a second one with the last run of the first
        fileA = File(lfn="/this/is/file1a", size=1000, events=2400)
        lumiListA = []
        lumiListB = []
        lumiListC = []
        for lumi in range(8):
            lumiListA.append(1 + lumi)
            lumiListB.append(1 + lumi)
            lumiListC.append(1 + lumi)
        fileA.addRun(Run(1, *lumiListA))
        fileA.addRun(Run(2, *lumiListA))
        fileA.addRun(Run(3, *lumiListA))
        fileA.setLocation("somese.cern.ch")

        fileB = self.createFile('/this/is/file2a', 200, 3, 5, "somese.cern.ch")

        testFileset = Fileset(name='FilesetB')
        testFileset.create()
        testFileset.addFile(fileA)
        testFileset.addFile(fileB)
        testFileset.commit()
        testSubscription = Subscription(fileset=testFileset,
                                        workflow=self.testWorkflow,
                                        split_algo="EventAwareLumiBased",
                                        type="Processing")
        testSubscription.create()

        jobFactory = splitter(package="WMCore.WMBS",
                              subscription=testSubscription)
        #The settings for this splitting are 700 events per job
        jobGroups = jobFactory(splitOnRun=True,
                               halt_job_on_file_boundaries=False,
                               events_per_job=700)
        self.assertEqual(len(jobGroups), 1,
                         "There should be only one job group")
        jobs = jobGroups[0].jobs
        self.assertEqual(len(jobs), 6, "Six jobs must be in the jobgroup")

Пример #23

Показать файл

class WMBSHelper(WMConnectionBase):
    """
    _WMBSHelper_

    Interface between the WorkQueue and WMBS.
    """
    def __init__(self,
                 wmSpec,
                 taskName,
                 blockName=None,
                 mask=None,
                 cachepath='.'):
        """
        _init_

        Initialize DAOs and other things needed.
        """
        self.block = blockName
        self.mask = mask
        self.wmSpec = wmSpec
        self.topLevelTask = wmSpec.getTask(taskName)
        self.cachepath = cachepath
        self.isDBS = True

        self.topLevelFileset = None
        self.topLevelSubscription = None
        self.topLevelTaskDBSBufferId = None

        self.mergeOutputMapping = {}

        # Initiate the pieces you need to run your own DAOs
        WMConnectionBase.__init__(self, "WMCore.WMBS")
        myThread = threading.currentThread()
        self.dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                                        logger=myThread.logger,
                                        dbinterface=myThread.dbi)

        # DAOs from WMBS for file commit
        self.setParentage = self.daofactory(classname="Files.SetParentage")
        self.setFileRunLumi = self.daofactory(classname="Files.AddRunLumi")
        self.setFileLocation = self.daofactory(
            classname="Files.SetLocationForWorkQueue")
        self.setFileAddChecksum = self.daofactory(
            classname="Files.AddChecksumByLFN")
        self.addFileAction = self.daofactory(classname="Files.Add")
        self.addToFileset = self.daofactory(classname="Files.AddDupsToFileset")
        self.getLocations = self.daofactory(classname="Locations.ListSites")
        self.getLocationInfo = self.daofactory(
            classname="Locations.GetSiteInfo")

        # DAOs from DBSBuffer
        self.dbsCreateFiles = self.dbsDaoFactory(
            classname="DBSBufferFiles.Add")
        self.dbsSetLocation = self.dbsDaoFactory(
            classname="DBSBufferFiles.SetLocationByLFN")
        self.dbsInsertLocation = self.dbsDaoFactory(
            classname="DBSBufferFiles.AddLocation")
        self.dbsSetChecksum = self.dbsDaoFactory(
            classname="DBSBufferFiles.AddChecksumByLFN")
        self.dbsInsertWorkflow = self.dbsDaoFactory(classname="InsertWorkflow")

        # Added for file creation bookkeeping
        self.dbsFilesToCreate = []
        self.addedLocations = []
        self.wmbsFilesToCreate = []
        self.insertedBogusDataset = -1

        return

    def createSandbox(self):
        """Create the runtime sandbox"""
        sandboxCreator = SandboxCreator()
        sandboxCreator.makeSandbox(self.cachepath, self.wmSpec)

    def createTopLevelFileset(self, topLevelFilesetName=None):
        """
        _createTopLevelFileset_

        Create the top level fileset for the workflow.  If the name of the top
        level fileset is not given create one.
        """
        if topLevelFilesetName == None:
            filesetName = (
                "%s-%s" %
                (self.wmSpec.name(), self.wmSpec.getTopLevelTask()[0].name()))
            if self.block:
                filesetName += "-%s" % self.block
            if self.mask:
                from hashlib import md5
                mask_string = ",".join(
                    ["%s=%s" % (x, self.mask[x]) for x in sorted(self.mask)])
                filesetName += "-%s" % md5(mask_string).hexdigest()
        else:
            filesetName = topLevelFilesetName

        self.topLevelFileset = Fileset(filesetName)
        self.topLevelFileset.create()
        return

    def outputFilesetName(self, task, outputModuleName):
        """
        _outputFilesetName_

        Generate an output fileset name for the given task and output module.
        """
        if task.taskType() == "Merge":
            outputFilesetName = "%s/merged-%s" % (task.getPathName(),
                                                  outputModuleName)
        else:
            outputFilesetName = "%s/unmerged-%s" % (task.getPathName(),
                                                    outputModuleName)

        return outputFilesetName

    def createSubscription(self, task, fileset, alternativeFilesetClose=False):
        """
        _createSubscription_

        Create subscriptions in the database.
        This includes workflows in WMBS and DBSBuffer, output maps, datasets
        and phedex subscriptions, and filesets for each task below and including
        the given task.
        """
        sub = self._createSubscriptionsInWMBS(task, fileset,
                                              alternativeFilesetClose)

        self._createWorkflowsInDBSBuffer()
        self._createDatasetSubscriptionsInDBSBuffer()

        return sub

    def _createSubscriptionsInWMBS(self,
                                   task,
                                   fileset,
                                   alternativeFilesetClose=False):
        """
        __createSubscriptionsInWMBS_

        Create subscriptions in WMBS for all the tasks in the spec.  This
        includes filesets, workflows and the output map for each task.
        """
        # create runtime sandbox for workflow
        self.createSandbox()

        #FIXME: Let workflow put in values if spec is missing them
        workflow = Workflow(
            spec=self.wmSpec.specUrl(),
            owner=self.wmSpec.getOwner()["name"],
            dn=self.wmSpec.getOwner().get("dn", "unknown"),
            group=self.wmSpec.getOwner().get("group", "unknown"),
            owner_vogroup=self.wmSpec.getOwner().get("vogroup", "DEFAULT"),
            owner_vorole=self.wmSpec.getOwner().get("vorole", "DEFAULT"),
            name=self.wmSpec.name(),
            task=task.getPathName(),
            wfType=self.wmSpec.getDashboardActivity(),
            alternativeFilesetClose=alternativeFilesetClose,
            priority=self.wmSpec.priority())
        workflow.create()
        subscription = Subscription(fileset=fileset,
                                    workflow=workflow,
                                    split_algo=task.jobSplittingAlgorithm(),
                                    type=task.getPrimarySubType())
        if subscription.exists():
            subscription.load()
            msg = "Subscription %s already exists for %s (you may ignore file insertion messages below, existing files wont be duplicated)"
            self.logger.info(msg % (subscription['id'], task.getPathName()))
        else:
            subscription.create()
        for site in task.siteWhitelist():
            subscription.addWhiteBlackList([{
                "site_name": site,
                "valid": True
            }])

        for site in task.siteBlacklist():
            subscription.addWhiteBlackList([{
                "site_name": site,
                "valid": False
            }])

        if self.topLevelSubscription == None:
            self.topLevelSubscription = subscription
            logging.info("Top level subscription created: %s" %
                         subscription["id"])
        else:
            logging.info("Child subscription created: %s" % subscription["id"])

        outputModules = task.getOutputModulesForTask()
        for outputModule in outputModules:
            for outputModuleName in outputModule.listSections_():
                outputFileset = Fileset(
                    self.outputFilesetName(task, outputModuleName))
                outputFileset.create()
                outputFileset.markOpen(True)
                mergedOutputFileset = None

                for childTask in task.childTaskIterator():
                    if childTask.data.input.outputModule == outputModuleName:
                        if childTask.taskType() == "Merge":
                            mergedOutputFileset = Fileset(
                                self.outputFilesetName(childTask, "Merged"))
                            mergedOutputFileset.create()
                            mergedOutputFileset.markOpen(True)

                            primaryDataset = getattr(
                                getattr(outputModule, outputModuleName),
                                "primaryDataset", None)
                            if primaryDataset != None:
                                self.mergeOutputMapping[
                                    mergedOutputFileset.id] = primaryDataset

                        self._createSubscriptionsInWMBS(
                            childTask, outputFileset, alternativeFilesetClose)

                if mergedOutputFileset == None:
                    workflow.addOutput(outputModuleName, outputFileset,
                                       outputFileset)
                else:
                    workflow.addOutput(outputModuleName, outputFileset,
                                       mergedOutputFileset)

        return self.topLevelSubscription

    def addMCFakeFile(self):
        """Add a fake file for wmbs to run production over"""
        needed = [
            'FirstEvent', 'FirstLumi', 'FirstRun', 'LastEvent', 'LastLumi',
            'LastRun'
        ]
        for key in needed:
            if self.mask and self.mask.get(key) is None:
                raise RuntimeError, 'Invalid value "%s" for %s' % (
                    self.mask.get(key), key)
        locations = set()
        for site in self.getLocations.execute(
                conn=self.getDBConn(), transaction=self.existingTransaction()):
            try:
                siteInfo = self.getLocationInfo.execute(
                    site,
                    conn=self.getDBConn(),
                    transaction=self.existingTransaction())
                if not siteInfo:
                    self.logger.info(
                        'Skipping MonteCarlo injection to site "%s" as unknown to wmbs'
                        % site)
                    continue
                locations.add(siteInfo[0]['se_name'])
            except StandardError, ex:
                self.logger.error(
                    'Error getting storage element for "%s": %s' %
                    (site, str(ex)))
        if not locations:
            raise RuntimeError, "No locations to inject Monte Carlo work to, unable to proceed"
        mcFakeFileName = "MCFakeFile-%s" % self.topLevelFileset.name
        wmbsFile = File(
            lfn=mcFakeFileName,
            first_event=self.mask['FirstEvent'],
            last_event=self.mask['LastEvent'],
            events=self.mask['LastEvent'] - self.mask['FirstEvent'] +
            1,  # inclusive range
            locations=locations,
            merged=False,  # merged causes dbs parentage relation
        )

        if self.mask:
            lumis = range(self.mask['FirstLumi'],
                          self.mask['LastLumi'] + 1)  # inclusive range
            wmbsFile.addRun(Run(self.mask['FirstRun'],
                                *lumis))  # assume run number static
        else:
            wmbsFile.addRun(Run(1, 1))

        wmbsFile['inFileset'] = True  # file is not a parent

        logging.info("WMBS File: %s on Location: %s" %
                     (wmbsFile['lfn'], wmbsFile['newlocations']))

        self.wmbsFilesToCreate.append(wmbsFile)

        totalFiles = self.topLevelFileset.addFilesToWMBSInBulk(
            self.wmbsFilesToCreate, self.wmSpec.name(), isDBS=self.isDBS)

        self.topLevelFileset.markOpen(False)
        return totalFiles

Пример #24

Показать файл

Файл: Job_t.py Проект: vytjan/WMCore

    def testMask(self):
        """
        _testMask_

        Test the new mask setup
        """

        testWorkflow = Workflow(spec="spec.xml",
                                owner="Steve",
                                name="wf001",
                                task="Test")

        testWorkflow.create()

        testFileset = Fileset(name="TestFileset")
        testFileset.create()

        testSubscription = Subscription(fileset=testFileset,
                                        workflow=testWorkflow)

        testSubscription.create()

        testFileA = File(lfn=makeUUID(), locations="test.site.ch")
        testFileB = File(lfn=makeUUID(), locations="test.site.ch")
        testFileA.create()
        testFileB.create()

        testFileset.addFile([testFileA, testFileB])
        testFileset.commit()

        testSubscription.acquireFiles([testFileA, testFileB])

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testJob = Job()
        testJob['mask'].addRunAndLumis(run=100, lumis=[101, 102])
        testJob['mask'].addRunAndLumis(run=200, lumis=[201, 202])
        testJob.create(group=testJobGroup)

        loadJob = Job(id=testJob.exists())
        loadJob.loadData()

        runs = loadJob['mask'].getRunAndLumis()
        self.assertEqual(len(runs), 2)
        self.assertEqual(runs[100], [[101, 102]])
        self.assertEqual(runs[200], [[201, 202]])

        bigRun = Run(100, *[101, 102, 103, 104])
        badRun = Run(300, *[1001, 1002])
        result = loadJob['mask'].filterRunLumisByMask([bigRun, badRun])

        self.assertEqual(len(result), 1)
        alteredRun = result.pop()
        self.assertEqual(alteredRun.run, 100)
        self.assertEqual(alteredRun.lumis, [101, 102])

        run0 = Run(300, *[1001, 1002])
        run1 = Run(300, *[1001, 1002])
        loadJob['mask'].filterRunLumisByMask([run0, run1])

        return

Пример #25

Показать файл

    def algorithm(self, *args, **kwargs):
        """
        _algorithm_

        An event base splitting algorithm.  All available files are split into a
        set number of events per job.
        """
        eventsPerJob = int(kwargs.get("events_per_job", 100))
        eventsPerLumi = int(kwargs.get("events_per_lumi", eventsPerJob))
        getParents = kwargs.get("include_parents", False)
        lheInput = kwargs.get("lheInputFiles", False)
        collectionName = kwargs.get('collectionName', None)
        timePerEvent, sizePerEvent, memoryRequirement = \
            self.getPerformanceParameters(kwargs.get('performance', {}))
        acdcFileList = []
        deterministicPileup = kwargs.get('deterministicPileup', False)

        if eventsPerJob <= 0 or eventsPerLumi <= 0:
            msg = "events_per_job and events_per_lumi must be positive. Their values are: "
            msg += "events_per_job: %d, events_per_lumi: %d" % (eventsPerJob, eventsPerLumi)
            raise RuntimeError(msg)

        if deterministicPileup and self.package == 'WMCore.WMBS':
            getJobNumber = self.daoFactory(classname="Jobs.GetNumberOfJobsPerWorkflow")
            self.nJobs = getJobNumber.execute(workflow=self.subscription.getWorkflow().id)
            logging.info('Creating jobs in DeterministicPileup mode for %s',
                         self.subscription.workflowName())

        # If we have runLumi info, we need to load it from couch
        if collectionName:
            try:
                from WMCore.ACDC.DataCollectionService import DataCollectionService
                couchURL = kwargs.get('couchURL')
                couchDB = kwargs.get('couchDB')
                filesetName = kwargs.get('filesetName')
                collectionName = kwargs.get('collectionName')
                logging.info('Creating jobs for ACDC fileset %s', filesetName)
                dcs = DataCollectionService(couchURL, couchDB)
                acdcFileList = dcs.getProductionACDCInfo(collectionName, filesetName)
            except Exception as ex:
                msg = "Exception while trying to load goodRunList\n"
                msg += "Refusing to create any jobs.\n"
                msg += str(ex)
                msg += str(traceback.format_exc())
                logging.error(msg)
                return

        totalJobs = 0

        locationDict = self.sortByLocation()
        for location in locationDict:
            self.newGroup()
            fileList = locationDict[location]
            getRunLumiInformation = False
            for f in fileList:
                if f['lfn'].startswith("MCFakeFile"):
                    # We have one MCFakeFile, then it needs run information
                    getRunLumiInformation = True
                    break
            if getRunLumiInformation:
                if self.package == 'WMCore.WMBS':
                    loadRunLumi = self.daoFactory(classname="Files.GetBulkRunLumi")
                    fileLumis = loadRunLumi.execute(files=fileList)
                    for f in fileList:
                        lumiDict = fileLumis.get(f['id'], {})
                        for run in lumiDict.keys():
                            f.addRun(run=Run(run, *lumiDict[run]))

            for f in fileList:
                currentEvent = f['first_event']
                eventsInFile = f['events']
                runs = list(f['runs'])
                # We got the runs, clean the file.
                f['runs'] = set()

                if getParents:
                    parentLFNs = self.findParent(lfn=f['lfn'])
                    for lfn in parentLFNs:
                        parent = File(lfn=lfn)
                        f['parents'].add(parent)

                if acdcFileList:
                    if f['lfn'] in [x['lfn'] for x in acdcFileList]:
                        totalJobs = self.createACDCJobs(f, acdcFileList, timePerEvent,
                                                        sizePerEvent, memoryRequirement,
                                                        lheInput, eventsPerJob, eventsPerLumi,
                                                        deterministicPileup, totalJobs)
                    continue
                if not f['lfn'].startswith("MCFakeFile"):
                    # Very very uncommon, but it has real input dataset
                    if eventsInFile >= eventsPerJob:
                        while currentEvent < eventsInFile:
                            self.newJob(name=self.getJobName(length=totalJobs))
                            self.currentJob.addFile(f)
                            if eventsPerJob + currentEvent < eventsInFile:
                                jobTime = eventsPerJob * timePerEvent
                                diskRequired = eventsPerJob * sizePerEvent
                                self.currentJob["mask"].setMaxAndSkipEvents(eventsPerJob, currentEvent)
                            else:
                                jobTime = (eventsInFile - currentEvent) * timePerEvent
                                diskRequired = (eventsInFile - currentEvent) * sizePerEvent
                                self.currentJob["mask"].setMaxAndSkipEvents(None,
                                                                            currentEvent)
                            self.currentJob.addResourceEstimates(jobTime=jobTime,
                                                                 memory=memoryRequirement,
                                                                 disk=diskRequired)
                            if deterministicPileup:
                                self.currentJob.addBaggageParameter("skipPileupEvents", (self.nJobs - 1) * eventsPerJob)

                            logging.debug("Job created for real input with %s", self.currentJob)
                            currentEvent += eventsPerJob
                            totalJobs += 1
                    else:
                        self.newJob(name=self.getJobName(length=totalJobs))
                        self.currentJob.addFile(f)
                        jobTime = eventsInFile * timePerEvent
                        diskRequired = eventsInFile * sizePerEvent
                        self.currentJob.addResourceEstimates(jobTime=jobTime,
                                                             memory=memoryRequirement,
                                                             disk=diskRequired)
                        if deterministicPileup:
                            self.currentJob.addBaggageParameter("skipPileupEvents", (self.nJobs - 1) * eventsPerJob)
                        logging.debug("Last job created for real input with %s", self.currentJob)
                        totalJobs += 1
                else:
                    # This assumes there's only one run which is the case for MC
                    lumis = runs[0].lumis
                    (firstLumi, lastLumi) = (min(lumis), max(lumis))
                    currentLumi = firstLumi
                    totalEvents = 0
                    if eventsInFile >= eventsPerJob:
                        while totalEvents < eventsInFile:
                            self.newJob(name=self.getJobName(length=totalJobs))
                            self.currentJob.addFile(f)
                            self.currentJob.addBaggageParameter("lheInputFiles", lheInput)
                            lumisPerJob = int(ceil(float(eventsPerJob)
                                                   / eventsPerLumi))
                            # Limit the number of events to a unsigned 32bit int
                            eventsRemaining = eventsInFile - totalEvents
                            if (currentEvent + eventsPerJob - 1) > (2 ** 32 - 1) and (
                                    currentEvent + eventsRemaining - 1) > (2 ** 32 - 1):
                                currentEvent = 1
                            if eventsRemaining > eventsPerJob:
                                self.currentJob["mask"].setMaxAndSkipEvents(eventsPerJob,
                                                                            currentEvent)
                                self.currentJob["mask"].setMaxAndSkipLumis(lumisPerJob,
                                                                           currentLumi)
                                jobTime = eventsPerJob * timePerEvent
                                diskRequired = eventsPerJob * sizePerEvent
                            else:
                                jobTime = eventsRemaining * timePerEvent
                                diskRequired = eventsRemaining * sizePerEvent
                                lumisPerJob = int(ceil(float(eventsRemaining) / eventsPerLumi))
                                self.currentJob["mask"].setMaxAndSkipEvents(eventsRemaining,
                                                                            currentEvent)
                                self.currentJob["mask"].setMaxAndSkipLumis(lumisPerJob,
                                                                           currentLumi)

                            if deterministicPileup:
                                self.currentJob.addBaggageParameter("skipPileupEvents", (self.nJobs - 1) * eventsPerJob)
                            currentLumi += lumisPerJob
                            currentEvent += eventsPerJob
                            totalEvents += eventsPerJob
                            totalJobs += 1
                            self.currentJob.addResourceEstimates(jobTime=jobTime,
                                                                 memory=memoryRequirement,
                                                                 disk=diskRequired)
                    else:
                        self.newJob(name=self.getJobName(length=totalJobs))
                        self.currentJob.addFile(f)
                        # For MC we use firstEvent instead of skipEvents so set it to 1
                        # We must check for events going over 2**32 - 1 here too
                        if (eventsInFile + currentEvent - 1) > (2 ** 32 - 1):
                            currentEvent = 1
                        self.currentJob["mask"].setMaxAndSkipEvents(eventsInFile,
                                                                    currentEvent)
                        self.currentJob["mask"].setMaxAndSkipLumis(lastLumi -
                                                                   currentLumi + 1, currentLumi)
                        jobTime = eventsInFile * timePerEvent
                        diskRequired = eventsInFile * sizePerEvent
                        self.currentJob.addResourceEstimates(jobTime=jobTime,
                                                             memory=memoryRequirement,
                                                             disk=diskRequired)
                        if deterministicPileup:
                            self.currentJob.addBaggageParameter("skipPileupEvents", (self.nJobs - 1) * eventsPerJob)
                        totalJobs += 1

Пример #26

Показать файл

    def testB_AlgoMigration(self):
        """
        _AlgoMigration_

        Test our ability to migrate multiple algos to global

        Do this by creating, mid-poll, two separate batches of files
        One with the same dataset but a different algo
        One with the same algo, but a different dataset
        See that they both get to global
        """
        #raise nose.SkipTest
        myThread = threading.currentThread()
        config = self.createConfig()
        self.injectWorkflow(MaxWaitTime=20)
        name = "ThisIsATest_%s" % (makeUUID())
        tier = "RECO"
        nFiles = 12
        files = self.getFiles(name=name, tier=tier, nFiles=nFiles)
        datasetPath = '/%s/%s/%s' % (name, name, tier)

        # Load components that are necessary to check status
        factory = WMFactory("dbsUpload",
                            "WMComponent.DBSUpload.Database.Interface")
        dbinterface = factory.loadObject("UploadToDBS")

        dbsInterface = DBSInterface(config=config)
        localAPI = dbsInterface.getAPIRef()
        globeAPI = dbsInterface.getAPIRef(globalRef=True)

        testDBSUpload = DBSUploadPoller(config=config)
        testDBSUpload.algorithm()

        # There should now be one block
        result = listBlocks(apiRef=globeAPI, datasetPath=datasetPath)
        self.assertEqual(len(result), 1)

        # Okay, by now, the first migration should have gone through.
        # Now create a second batch of files with the same dataset
        # but a different algo.
        for i in range(0, nFiles):
            testFile = DBSBufferFile(lfn='%s-batch2-%i' % (name, i),
                                     size=1024,
                                     events=20,
                                     checksums={'cksum': 1},
                                     locations="malpaquet")
            testFile.setAlgorithm(appName="cmsRun",
                                  appVer="CMSSW_3_1_1",
                                  appFam=tier,
                                  psetHash="GIBBERISH_PART2",
                                  configContent=self.configURL)
            testFile.setDatasetPath(datasetPath)
            testFile.addRun(Run(1, *[46]))
            testFile.create()

        # Have to do things twice to get parents
        testDBSUpload.algorithm()
        testDBSUpload.algorithm()

        # There should now be two blocks
        result = listBlocks(apiRef=globeAPI, datasetPath=datasetPath)
        self.assertEqual(len(result), 2)

        # Now create another batch of files with the original algo
        # But in a different dataset
        for i in range(0, nFiles):
            testFile = DBSBufferFile(lfn='%s-batch3-%i' % (name, i),
                                     size=1024,
                                     events=20,
                                     checksums={'cksum': 1},
                                     locations="malpaquet")
            testFile.setAlgorithm(appName=name,
                                  appVer="CMSSW_3_1_1",
                                  appFam=tier,
                                  psetHash="GIBBERISH",
                                  configContent=self.configURL)
            testFile.setDatasetPath('/%s/%s_3/%s' % (name, name, tier))
            testFile.addRun(Run(1, *[46]))
            testFile.create()

        # Do it twice for parentage.
        testDBSUpload.algorithm()
        testDBSUpload.algorithm()

        # There should now be one block
        result = listBlocks(apiRef=globeAPI,
                            datasetPath='/%s/%s_3/%s' % (name, name, tier))
        self.assertEqual(len(result), 1)

        # Well, all the blocks got there, so we're done
        return

Пример #27

Показать файл

    def __call__(self, filesetToProcess):
        """
        The algorithm itself
        """
        global LOCK

        # Get configuration
        initObj = WMInit()
        initObj.setLogging()
        initObj.setDatabaseConnection(os.getenv("DATABASE"), \
            os.getenv('DIALECT'), os.getenv("DBSOCK"))

        myThread = threading.currentThread()

        daofactory = DAOFactory(package = "WMCore.WMBS" , \
              logger = myThread.logger, \
              dbinterface = myThread.dbi)

        locationNew = daofactory(classname="Locations.New")
        getFileLoc = daofactory(classname="Files.GetLocation")
        fileInFileset = daofactory(classname="Files.InFileset")


        logging.debug("the T0Feeder is processing %s" % \
                 filesetToProcess.name)
        logging.debug("the fileset name %s" % \
         (filesetToProcess.name).split(":")[0])

        # Get the start Run if asked
        startRun = (filesetToProcess.name).split(":")[3]
        fileType = (filesetToProcess.name).split(":")[2]

        LASTIME = filesetToProcess.lastUpdate

        # url builder
        primaryDataset = ((filesetToProcess.name).split(":")[0]).split('/')[1]
        processedDataset = ((
            filesetToProcess.name).split(":")[0]).split('/')[2]
        dataTier = ((filesetToProcess.name\
            ).split(":")[0]).split('/')[3]
        url = "/tier0/listfilesoverinterval/%s/%s/%s/%s/%s" % \
              (fileType, LASTIME, primaryDataset,processedDataset, dataTier)

        tries = 1
        while True:

            try:

                myRequester = JSONRequests(url="vocms52.cern.ch:8889")
                requestResult = myRequester.get(\
              url+"/"+"?return_type=text/json%2Bdas")
                newFilesList = requestResult[0]["results"]

            except:

                logging.debug("T0Reader call error...")
                if tries == self.maxRetries:
                    return
                else:
                    tries += 1
                    continue

            logging.debug("T0 queries done ...")
            now = time.time()
            LASTIME = int(newFilesList['end_time']) + 1

            break

        # process all files
        if len(newFilesList['files']):

            try:
                locationNew.execute(siteName="caf.cern.ch",
                                    seName="caf.cern.ch")
            except Exception as e:
                logging.debug("Error when adding new location...")
                logging.debug(e)
                logging.debug(format_exc())

            for files in newFilesList['files']:

                # Assume parents aren't asked
                newfile = File(str(files['lfn']), \
           size = files['file_size'], events = files['events'])

                try:

                    LOCK.acquire()

                    if newfile.exists() == False:
                        newfile.create()

                        for run in files['runs']:

                            runSet = set()
                            runSet.add(Run(run, *files['runs'][run]))
                            newfile.addRunSet(runSet)

                    else:
                        newfile.loadData()

                    fileLoc = getFileLoc.execute(file=files['lfn'])

                    if 'caf.cern.ch' not in fileLoc:
                        newfile.setLocation("caf.cern.ch")

#                    else:
#                        logging.debug("File already associated to %s" %fileLoc)

                    LOCK.release()

                    if len(newfile["runs"]):

                        val = 0
                        for run in newfile['runs']:

                            if run.run < int(startRun):
                                val = 1
                                break

                        if not val:

                            listFile = fileInFileset.execute\
                               (filesetToProcess.id)
                            if {'fileid': newfile['id']} not in listFile:

                                filesetToProcess.addFile(newfile)
                                filesetToProcess.setLastUpdate\
                              (int(newFilesList['end_time']) + 1)
                                filesetToProcess.commit()
                                logging.debug(
                                    "new file created/loaded added by T0AST..."
                                )

                except Exception as e:
                    logging.debug("Error when adding new files in T0AST...")
                    logging.debug(e)
                    logging.debug(format_exc())
                    LOCK.release()

            filesetToProcess.commit()

        else:

            logging.debug("nothing to do in T0AST...")
            # For reopned fileset or empty
            # try until the purge time is reached
            if (int(now) / 3600 - LASTIME / 3600) > self.reopenTime:

                filesetToProcess.setLastUpdate(time.time())
                filesetToProcess.commit()

        # Commit the fileset
        logging.debug("Test purge in T0AST ...")
        filesetToProcess.load()
        LASTIME = filesetToProcess.lastUpdate

        if (int(now) / 3600 - LASTIME / 3600) > self.purgeTime:

            filesetToProcess.markOpen(False)
            logging.debug("Purge Done...")

        filesetToProcess.commit()

Пример #28

Показать файл

Файл: JobWorkUnit_t.py Проект: cronot99/WMCore

    def testCreateDeleteExists(self):
        """
        Create and then delete a job and workflow.  Use the workunit class's exists() method to
        determine if the workunit has been written to the database before the job is
        created, after the job has been created, and after the workflow has been deleted.
        """

        testWorkflow = Workflow(spec="spec.xml",
                                owner="Simon",
                                name="wf001",
                                task="Test")
        testWorkflow.create()

        testWMBSFileset = Fileset(name="TestFileset")
        testWMBSFileset.create()

        testSubscription = Subscription(fileset=testWMBSFileset,
                                        workflow=testWorkflow)
        testSubscription.create()

        testJobGroup = JobGroup(subscription=testSubscription)
        testJobGroup.create()

        testFileA = File(lfn="/this/is/a/lfnA", size=1024, events=10)
        testFileA.addRun(Run(1, *[45]))
        testFileB = File(lfn="/this/is/a/lfnB", size=1024, events=10)
        testFileB.addRun(Run(1, *[46]))

        testFileA.create()
        testFileB.create()

        testJob = Job(name="TestJob", files=[testFileA, testFileB])
        testWU1 = WorkUnit(taskID=testWorkflow.id,
                           fileid=testFileA['id'],
                           runLumi=Run(1, *[45]))
        testWU2 = WorkUnit(taskID=testWorkflow.id,
                           fileid=testFileB['id'],
                           runLumi=Run(1, *[46]))

        self.assertFalse(testWU1.exists(),
                         "WorkUnit exists before job was created")
        self.assertFalse(testWU2.exists(),
                         "WorkUnit exists before job was created")

        testJob.create(group=testJobGroup)

        self.assertTrue(testWU1.exists(),
                        "WorkUnit does not exist after job was created")
        self.assertTrue(testWU2.exists(),
                        "WorkUnit does not exist after job was created")

        testJob.delete()

        self.assertTrue(testWU1.exists(),
                        "WorkUnit does not exist after job is deleted")
        self.assertTrue(testWU2.exists(),
                        "WorkUnit does not exist after job is deleted")

        testWorkflow.delete()

        self.assertFalse(testWU1.exists(),
                         "WorkUnit exists after workflow is deleted")
        self.assertFalse(testWU2.exists(),
                         "WorkUnit exists after workflow is deleted")

        return

Пример #29

Показать файл

Файл: DBSBufferFile_t.py Проект: prozober/WMCore

    def testAddChildTransaction(self):
        """
        _testAddChildTransaction_

        Add a child to some parent files and make sure that all the parentage
        information is loaded/stored correctly from the database.  Rollback the
        addition of one of the childs and then verify that it does in fact only
        have one parent.
        """
        testFileParentA = DBSBufferFile(lfn="/this/is/a/parent/lfnA",
                                        size=1024,
                                        events=20)
        testFileParentA.setAlgorithm(appName="cmsRun",
                                     appVer="CMSSW_2_1_8",
                                     appFam="RECO",
                                     psetHash="GIBBERISH",
                                     configContent="MOREGIBBERISH")
        testFileParentA.setDatasetPath("/Cosmics/CRUZET09-PromptReco-v1/RECO")
        testFileParentA.addRun(Run(1, *[45]))

        testFileParentB = DBSBufferFile(lfn="/this/is/a/parent/lfnB",
                                        size=1024,
                                        events=20)
        testFileParentB.setAlgorithm(appName="cmsRun",
                                     appVer="CMSSW_2_1_8",
                                     appFam="RECO",
                                     psetHash="GIBBERISH",
                                     configContent="MOREGIBBERISH")
        testFileParentB.setDatasetPath("/Cosmics/CRUZET09-PromptReco-v1/RECO")
        testFileParentB.addRun(Run(1, *[45]))
        testFileParentA.create()
        testFileParentB.create()

        testFileA = DBSBufferFile(lfn="/this/is/a/lfn", size=1024, events=10)
        testFileA.setAlgorithm(appName="cmsRun",
                               appVer="CMSSW_2_1_8",
                               appFam="RECO",
                               psetHash="GIBBERISH",
                               configContent="MOREGIBBERISH")
        testFileA.setDatasetPath("/Cosmics/CRUZET09-PromptReco-v1/RECO")
        testFileA.addRun(Run(1, *[45]))
        testFileA.create()

        testFileParentA.addChildren("/this/is/a/lfn")

        myThread = threading.currentThread()
        myThread.transaction.begin()

        testFileParentB.addChildren("/this/is/a/lfn")

        testFileB = DBSBufferFile(id=testFileA["id"])
        testFileB.load(parentage=1)

        goldenFiles = [testFileParentA, testFileParentB]
        for parentFile in testFileB["parents"]:
            assert parentFile in goldenFiles, \
                   "ERROR: Unknown parent file"
            goldenFiles.remove(parentFile)

        assert len(goldenFiles) == 0, \
              "ERROR: Some parents are missing"

        myThread.transaction.rollback()
        testFileB.load(parentage=1)

        goldenFiles = [testFileParentA]
        for parentFile in testFileB["parents"]:
            assert parentFile in goldenFiles, \
                   "ERROR: Unknown parent file"
            goldenFiles.remove(parentFile)

        assert len(goldenFiles) == 0, \
              "ERROR: Some parents are missing"

        return

Пример #30

Показать файл

    def createJobsLocationWise(self, fileset, endOfRun, dqmHarvestUnit,
                               lumiMask, goodRunList):

        myThread = threading.currentThread()
        fileset.loadData(parentage=0)
        allFiles = fileset.getFiles()

        # sort by location and run
        locationDict = {}
        runDict = {}
        for fileInfo in allFiles:

            locSet = frozenset(fileInfo['locations'])
            runSet = fileInfo.getRuns()

            if len(locSet) == 0:
                logging.error("File %s has no locations!", fileInfo['lfn'])
            if len(runSet) == 0:
                logging.error("File %s has no run information!",
                              fileInfo['lfn'])

            # Populate a dictionary with [location][run] so we can split jobs according to those different combinations
            if locSet not in locationDict.keys():
                locationDict[locSet] = {}

            fileInfo['runs'] = set()
            # Handle jobs with run whitelist/blacklist
            if goodRunList:
                runDict[fileInfo['lfn']] = set()
                for run in runSet:
                    if run.run in goodRunList:
                        runDict[fileInfo['lfn']].add(run)
                        if run.run in locationDict[locSet].keys():
                            locationDict[locSet][run.run].append(fileInfo)
                        else:
                            locationDict[locSet][run.run] = [fileInfo]
            elif lumiMask:
                # it has lumiMask, thus we consider only good run/lumis
                newRunSet = []
                for run in runSet:
                    if not isGoodRun(lumiMask, run.run):
                        continue
                    # then loop over lumis
                    maskedLumis = []
                    for lumi in run.lumis:
                        if not isGoodLumi(lumiMask, run.run, lumi):
                            continue
                        maskedLumis.append(lumi)

                    if not maskedLumis:
                        continue
                    maskedRun = Run(run.run, *maskedLumis)
                    newRunSet.append(maskedRun)

                    if run.run in locationDict[locSet].keys():
                        locationDict[locSet][run.run].append(fileInfo)
                    else:
                        locationDict[locSet][run.run] = [fileInfo]
                if newRunSet:
                    runDict[fileInfo['lfn']] = newRunSet
            else:
                # no LumiList and no run white or black list
                runDict[fileInfo['lfn']] = runSet
                for run in runSet:
                    if run.run in locationDict[locSet].keys():
                        locationDict[locSet][run.run].append(fileInfo)
                    else:
                        locationDict[locSet][run.run] = [fileInfo]

        # create separate jobs for different locations
        self.newGroup()
        self.jobCount = 0
        baseName = makeUUID()
        self.newGroup()

        if endOfRun:
            harvestType = "EndOfRun"
        else:
            harvestType = "Periodic"

        for location in locationDict.keys():

            if dqmHarvestUnit == "byRun":
                self.createJobByRun(locationDict, location, baseName,
                                    harvestType, runDict, endOfRun)
            else:
                self.createMultiRunJob(locationDict, location, baseName,
                                       harvestType, runDict, endOfRun)

        return