Пример #1
0
    def pollForClosable(self):
        """
        _pollForClosable_

        Search WMBS for filesets that can be closed and mark them as closed.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()

        closableFilesetDAO = self.daoFactory(classname="Fileset.ListClosable")
        closableFilesets = closableFilesetDAO.execute()

        for closableFileset in closableFilesets:
            openFileset = Fileset(id=closableFileset)
            openFileset.load()

            logging.debug("Closing fileset %s" % openFileset.name)
            openFileset.markOpen(False)

        myThread.transaction.commit()
Пример #2
0
    def pollForClosable(self):
        """
        _pollForClosable_

        Search WMBS for filesets that can be closed and mark them as closed.
        """
        myThread = threading.currentThread()
        myThread.transaction.begin()

        closableFilesetDAO = self.daoFactory(classname="Fileset.ListClosable")
        closableFilesets = closableFilesetDAO.execute()

        for closableFileset in closableFilesets:
            openFileset = Fileset(id=closableFileset)
            openFileset.load()

            logging.debug("Closing fileset %s", openFileset.name)
            openFileset.markOpen(False)

        myThread.transaction.commit()
Пример #3
0
class RepackMergeTest(unittest.TestCase):
    """
    _RepackMergeTest_
    Test for RepackMerge job splitter
    """

    def setUp(self):
        """
        _setUp_
        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules = ["WMComponent.DBS3Buffer", "T0.WMBS"])

        self.splitterFactory = SplitterFactory(package = "T0.JobSplitting")

        myThread = threading.currentThread()

        daoFactory = DAOFactory(package = "T0.WMBS",
                                logger = logging,
                                dbinterface = myThread.dbi)

        wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS",
                                    logger = logging,
                                    dbinterface = myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state, state_time)
                                    VALUES (1, 'SomeSite', 1, 1)
                                    """, transaction = False)
        myThread.dbi.processData("""INSERT INTO wmbs_pnns
                                    (id, pnn)
                                    VALUES (2, 'SomePNN')
                                    """, transaction = False)
        
        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 2)
                                    """, transaction = False)


        insertRunDAO = daoFactory(classname = "RunConfig.InsertRun")
        insertRunDAO.execute(binds = { 'RUN' : 1,
                                       'HLTKEY' : "someHLTKey" },
                             transaction = False)

        insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection")
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 1 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 2 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 3 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 4 },
                              transaction = False)
        insertLumiDAO.execute(binds = { 'RUN' : 1,
                                        'LUMI' : 5 },
                              transaction = False)

        insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream")
        insertStreamDAO.execute(binds = { 'STREAM' : "A" },
                                transaction = False)

        insertCMSSVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion")
        insertCMSSVersionDAO.execute(binds = { 'VERSION' : "CMSSW_4_2_7" },
                                     transaction = False)

        insertStreamCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertStreamCMSSWVersion")
        insertStreamCMSSWVersionDAO.execute(binds = { 'RUN' : 1,
                                                      'STREAM' : 'A',
                                                      'VERSION' : "CMSSW_4_2_7" },
                                            transaction = False)

        insertStreamerDAO = daoFactory(classname = "RunConfig.InsertStreamer")
        insertStreamerDAO.execute(streamerPNN = "SomePNN",
                                  binds = { 'RUN' : 1,
                                            'P5_ID' : 1,
                                            'LUMI' : 4,
                                            'STREAM' : "A",
                                            'LFN' : "/testLFN/A",
                                            'FILESIZE' : 100,
                                            'EVENTS' : 100,
                                            'TIME' : int(time.time()) },
                                  transaction = False)

        insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "A", "TestFileset1")

        self.fileset1 = Fileset(name = "TestFileset1")
        self.fileset2 = Fileset(name = "TestFileset2")
        self.fileset1.load()
        self.fileset2.create()

        workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test")
        workflow2 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow2", task="Test")
        workflow1.create()
        workflow2.create()

        self.subscription1  = Subscription(fileset = self.fileset1,
                                           workflow = workflow1,
                                           split_algo = "Repack",
                                           type = "Repack")
        self.subscription2  = Subscription(fileset = self.fileset2,
                                           workflow = workflow2,
                                           split_algo = "RepackMerge",
                                           type = "RepackMerge")
        self.subscription1.create()
        self.subscription2.create()

        myThread.dbi.processData("""INSERT INTO wmbs_workflow_output
                                    (WORKFLOW_ID, OUTPUT_IDENTIFIER, OUTPUT_FILESET)
                                    VALUES (%d, 'SOMEOUTPUT', %d)
                                    """ % (workflow1.id, self.fileset2.id),
                                 transaction = False)

        # keep for later
        self.insertSplitLumisDAO = daoFactory(classname = "JobSplitting.InsertSplitLumis")
        self.insertClosedLumiDAO = daoFactory(classname = "RunLumiCloseout.InsertClosedLumi")
        self.feedStreamersDAO = daoFactory(classname = "Tier0Feeder.FeedStreamers")                                                      
        self.acquireFilesDAO = wmbsDaoFactory(classname = "Subscriptions.AcquireFiles")
        self.completeFilesDAO = wmbsDaoFactory(classname = "Subscriptions.CompleteFiles")
        self.currentTime = int(time.time())

        # default split parameters
        self.splitArgs = {}
        self.splitArgs['minInputSize'] = 2.1 * 1024 * 1024 * 1024
        self.splitArgs['maxInputSize'] = 4.0 * 1024 * 1024 * 1024
        self.splitArgs['maxInputEvents'] = 100000000
        self.splitArgs['maxInputFiles'] = 1000
        self.splitArgs['maxEdmSize'] = 20 * 1024 * 1024 * 1024
        self.splitArgs['maxOverSize'] = 10 * 1024 * 1024 * 1024
        self.SplitArgs['maxLatency'] = 50000

        return

    def tearDown(self):
        """
        _tearDown_
        """
        self.testInit.clearDatabase()

        return

    def deleteSplitLumis(self):
        """
        _deleteSplitLumis_
        """
        myThread = threading.currentThread()

        myThread.dbi.processData("""DELETE FROM lumi_section_split_active
                                    """,
                                 transaction = False)

        return

    def test00(self):
        """
        _test00_
        Test that the job name prefix feature works
        Test max edm size threshold for single lumi
        small lumi, followed by over-large lumi
        expect 1 job for small lumi and 4 jobs for over-large
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2 * lumi):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxEdmSize'] = 13000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 3,
                         "ERROR: JobFactory didn't create three jobs")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("RepackMerge-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 3,
                         "ERROR: Job does not process 3 files")

        job = jobGroups[0].jobs[2]
        self.assertEqual(len(job.getFiles()), 1,
                         "ERROR: Job does not process 1 file")

        return

    def test01(self):
        """
        _test01_
        Test max size threshold for single lumi
        small lumi, followed by large lumi
        expect 1 job for small lumi and 1 job for large
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test02(self):
        """
        _test02_
        Test max event threshold for single lumi
        small lumi, followed by large lumi
        expect 1 job for small lumi and 1 job for large
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100 * lumi)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputEvents'] = 300
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test03(self):
        """
        _test03_
        Test max input files threshold for single lumi
        small lumi, followed by large lumi
        expect 1 job for small lumi and 1 job for large
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(lumi * 2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputFiles'] = 3
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return

    def test04(self):
        """
        _test04_
        Test max size threshold for multi lumi
        3 same size lumis
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputSize'] = 5000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test05(self):
        """
        _test05_
        Test max event threshold for multi lumi
        3 same size lumis
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputEvents'] = 500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test06(self):
        """
        _test06_
        Test max input files threshold for multi lumi
        3 same size lumis
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputFiles'] = 5
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test07(self):
        """
        _test07_
        Test over merge
        one small lumi, one large lumi (small below min size,
        large below max size, but both together above max size)
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        mySplitArgs['maxInputSize'] = 9000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return

    def test08(self):
        """
        _test08_
        Test under merge (over merge size threshold)
        one small lumi, one large lumi (small below min size,
        large below max size, but both together above max size)
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 3000
        mySplitArgs['maxInputSize'] = 9000
        mySplitArgs['maxOverSize'] = 9500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test09(self):
        """
        _test09_
        Test under merge (over merge event threshold)
        one small lumi, one large lumi (small below min size,
        large below max size, but both together above max size)
        
        It was changed due to maxinputevents not being used anymore.
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000 * lumi * lumi, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 1500
        mySplitArgs['maxInputSize'] = 9000
        mySplitArgs['maxOverSize'] = 9500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.fileset2.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        return

    def test10(self):
        """
        _test10_
        Test merging of multiple lumis with holes in the lumi sequence
        Hole is due to no streamer files for the lumi
        Multi lumi input
        
        It only works with a single hole, as it creates a merged file even with it being of a smaller size than the mininputsize.
        
        It was changed due to the maxinputevents not being used anymore
        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 4]:
            for i in range(2):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset2.addFile(newFile)
        self.fileset2.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription2)

        mySplitArgs['minInputSize'] = 100000
        mySplitArgs['maxInputSize'] = 200000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1,
                                                   'LUMI' : 3,
                                                   'STREAM' : "A",
                                                   'FILECOUNT' : 0,
                                                   'INSERT_TIME' : self.currentTime,
                                                   'CLOSE_TIME' : self.currentTime },
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        return
Пример #4
0
class RepackTest(unittest.TestCase):
    """
    _RepackTest_

    Test for Repack job splitter
    """

    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules = ["T0.WMBS"])

        self.splitterFactory = SplitterFactory(package = "T0.JobSplitting")

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package = "T0.WMBS",
                                logger = logging,
                                dbinterface = myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state)
                                    VALUES (1, 'SomeSite', 1)
                                    """, transaction = False)
        myThread.dbi.processData("""INSERT INTO wmbs_location_senames
                                    (location, se_name)
                                    VALUES (1, 'SomeSE')
                                    """, transaction = False)
        
        myThread.dbi.processData("""INSERT INTO wmbs_location_senames
                                    (location, se_name)
                                    VALUES (1, 'SomeSE2')
                                    """, transaction = False)

        insertRunDAO = daoFactory(classname = "RunConfig.InsertRun")
        insertRunDAO.execute(binds = { 'RUN' : 1,
                                       'TIME' : int(time.time()),
                                       'HLTKEY' : "someHLTKey" },
                             transaction = False)

        insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection")
        for lumi in [1, 2, 3, 4]:
            insertLumiDAO.execute(binds = { 'RUN' : 1,
                                            'LUMI' : lumi },
                                  transaction = False)

        insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream")
        insertStreamDAO.execute(binds = { 'STREAM' : "A" },
                                transaction = False)

        insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "A", "TestFileset1")

        self.fileset1 = Fileset(name = "TestFileset1")
        self.fileset1.load()

        workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test")
        workflow1.create()

        self.subscription1  = Subscription(fileset = self.fileset1,
                                           workflow = workflow1,
                                           split_algo = "Repack",
                                           type = "Repack")
        self.subscription1.create()

        # keep for later
        self.insertClosedLumiDAO = daoFactory(classname = "RunLumiCloseout.InsertClosedLumi")
        self.currentTime = int(time.time())

        # default split parameters
        self.splitArgs = {}
        self.splitArgs['maxSizeSingleLumi'] = 20*1024*1024*1024
        self.splitArgs['maxSizeMultiLumi'] = 10*1024*1024*1024
        self.splitArgs['maxInputEvents'] = 500000
        self.splitArgs['maxInputFiles'] = 1000

        return

    def tearDown(self):
        """
        _tearDown_

        """
        self.testInit.clearDatabase()

        return

    def getNumActiveSplitLumis(self):
        """
        _getNumActiveSplitLumis_

        helper function that counts the number of active split lumis
        """
        myThread = threading.currentThread()

        results = myThread.dbi.processData("""SELECT COUNT(*)
                                              FROM lumi_section_split_active
                                              """, transaction = False)[0].fetchall()

        return results[0][0]

    def test00(self):
        """
        _test00_

        Test that the job name prefix feature works
        Test multi lumi size threshold
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)

        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        mySplitArgs['maxSizeMultiLumi'] = self.splitArgs['maxSizeMultiLumi']
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxSizeMultiLumi'] = 5000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("Repack-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset1.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("Repack-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 0,
                         "ERROR: Split lumis were created")

        return

    def test01(self):
        """
        _test01_

        Test multi lumi event threshold
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 3, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "A",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputEvents'] = 500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset1.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 0,
                         "ERROR: Split lumis were created")

        return

    def test02(self):
        """
        _test02_

        Test single lumi size threshold
        Single lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1]:
            filecount = 8
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "A",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxSizeSingleLumi'] = 6500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 6,
                         "ERROR: Job does not process 6 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 1,
                         "ERROR: Split lumis were not created")

        return

    def test03(self):
        """
        _test03_

        Test single lumi event threshold
        Single lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1]:
            filecount = 8
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "A",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputEvents'] = 650
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 6,
                         "ERROR: Job does not process 6 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 1,
                         "ERROR: Split lumis were not created")

        return

    def test04(self):
        """
        _test04_

        Test streamer count threshold (only multi lumi)
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 3, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "A",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputFiles'] = 5
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset1.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 0,
                         "ERROR: Split lumis were created")

        return

    def test05(self):
        """
        _test05_

        Test repacking of multiple lumis with holes in the lumi sequence
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "A",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        mySplitArgs['maxInputFiles'] = 5
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = { 'RUN' : 1,
                                                   'LUMI' : 3,
                                                   'STREAM' : "A",
                                                   'FILECOUNT' : 0,
                                                   'INSERT_TIME' : self.currentTime,
                                                   'CLOSE_TIME' : self.currentTime },
                                         transaction = False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4,
                         "ERROR: first job does not process 4 files")


        return

    def test06(self):
        """
        _test06_

        Test repacking of 3 lumis
        2 small lumis (single job), followed by a big one (multiple jobs)

        files for lumi 1 and 2 are below multi-lumi thresholds
        files for lumi 3 are above single-lumi threshold

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 3]:
            filecount = 2
            for i in range(filecount):
                if lumi == 3:
                    nevents = 500
                else:
                    nevents = 100
                newFile = File(makeUUID(), size = 1000, events = nevents)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "A",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        mySplitArgs['maxInputEvents'] = 900
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 3,
                         "ERROR: JobFactory didn't create three jobs")

        self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4,
                         "ERROR: first job does not process 4 files")

	self.assertEqual(len(jobGroups[0].jobs[1].getFiles()), 1,
                         "ERROR: second job does not process 1 file")

        self.assertEqual(len(jobGroups[0].jobs[2].getFiles()), 1,
                         "ERROR: third job does not process 1 file")

        return
Пример #5
0
def configureRunStream(tier0Config, run, stream, specDirectory,
                       dqmUploadProxy):
    """
    _configureRunStream_

    Called by Tier0Feeder for new run/streams.

    Retrieve global run settings and build the part
    of the configuration relevant to run/stream
    and write it to the database.

    Create workflows, filesets and subscriptions for
    the processing of runs/streams.

    """
    logging.debug("configureRunStream() : %d , %s" % (run, stream))
    myThread = threading.currentThread()

    daoFactory = DAOFactory(package="T0.WMBS",
                            logger=logging,
                            dbinterface=myThread.dbi)

    # retrieve some basic run information
    getRunInfoDAO = daoFactory(classname="RunConfig.GetRunInfo")
    runInfo = getRunInfoDAO.execute(run, transaction=False)[0]

    # treat centralDAQ or miniDAQ runs (have an HLT key) different from local runs
    if runInfo['hltkey'] != None:

        # streams not explicitely configured are repacked
        if stream not in tier0Config.Streams.dictionary_().keys():
            addRepackConfig(tier0Config, stream)

        streamConfig = tier0Config.Streams.dictionary_()[stream]

        # consistency check to make sure stream exists and has datasets defined
        # only run if we don't ignore the stream
        if streamConfig.ProcessingStyle != "Ignore":
            getStreamDatasetsDAO = daoFactory(
                classname="RunConfig.GetStreamDatasets")
            datasets = getStreamDatasetsDAO.execute(run,
                                                    stream,
                                                    transaction=False)
            if len(datasets) == 0:
                raise RuntimeError(
                    "Stream is not defined in HLT menu or has no datasets !")

        # write stream/dataset mapping (for special express and error datasets)
        insertDatasetDAO = daoFactory(
            classname="RunConfig.InsertPrimaryDataset")
        insertStreamDatasetDAO = daoFactory(
            classname="RunConfig.InsertStreamDataset")

        # write stream configuration
        insertCMSSWVersionDAO = daoFactory(
            classname="RunConfig.InsertCMSSWVersion")
        insertStreamStyleDAO = daoFactory(
            classname="RunConfig.InsertStreamStyle")
        insertRepackConfigDAO = daoFactory(
            classname="RunConfig.InsertRepackConfig")
        insertPromptCalibrationDAO = daoFactory(
            classname="RunConfig.InsertPromptCalibration")
        insertExpressConfigDAO = daoFactory(
            classname="RunConfig.InsertExpressConfig")
        insertSpecialDatasetDAO = daoFactory(
            classname="RunConfig.InsertSpecialDataset")
        insertDatasetScenarioDAO = daoFactory(
            classname="RunConfig.InsertDatasetScenario")
        insertStreamFilesetDAO = daoFactory(
            classname="RunConfig.InsertStreamFileset")
        insertRecoReleaseConfigDAO = daoFactory(
            classname="RunConfig.InsertRecoReleaseConfig")
        insertWorkflowMonitoringDAO = daoFactory(
            classname="RunConfig.InsertWorkflowMonitoring")
        insertStorageNodeDAO = daoFactory(
            classname="RunConfig.InsertStorageNode")
        insertPhEDExConfigDAO = daoFactory(
            classname="RunConfig.InsertPhEDExConfig")

        bindsCMSSWVersion = []
        bindsDataset = []
        bindsStreamDataset = []
        bindsStreamStyle = {
            'RUN': run,
            'STREAM': stream,
            'STYLE': streamConfig.ProcessingStyle
        }
        bindsRepackConfig = {}
        bindsPromptCalibration = {}
        bindsExpressConfig = {}
        bindsSpecialDataset = {}
        bindsDatasetScenario = []
        bindsStorageNode = []
        bindsPhEDExConfig = []

        # mark workflows as injected
        wmbsDaoFactory = DAOFactory(package="WMCore.WMBS",
                                    logger=logging,
                                    dbinterface=myThread.dbi)
        markWorkflowsInjectedDAO = wmbsDaoFactory(
            classname="Workflow.MarkInjectedWorkflows")

        #
        # for spec creation, details for all outputs
        #
        outputModuleDetails = []

        #
        # special dataset for some express output
        #
        specialDataset = None

        #
        # for PromptReco delay settings
        #
        promptRecoDelay = {}
        promptRecoDelayOffset = {}

        #
        # for PhEDEx subscription settings
        #
        subscriptions = []

        # some hardcoded PhEDEx defaults
        expressPhEDExInjectNode = "T2_CH_CERN"
        expressPhEDExSubscribeNode = "T2_CH_CERN"

        #
        # first take care of all stream settings
        #
        getStreamOnlineVersionDAO = daoFactory(
            classname="RunConfig.GetStreamOnlineVersion")
        onlineVersion = getStreamOnlineVersionDAO.execute(run,
                                                          stream,
                                                          transaction=False)

        if streamConfig.ProcessingStyle == "Bulk":

            streamConfig.Repack.CMSSWVersion = streamConfig.VersionOverride.get(
                onlineVersion, onlineVersion)

            bindsCMSSWVersion.append(
                {'VERSION': streamConfig.Repack.CMSSWVersion})

            streamConfig.Repack.ScramArch = tier0Config.Global.ScramArches.get(
                streamConfig.Repack.CMSSWVersion,
                tier0Config.Global.DefaultScramArch)

            bindsRepackConfig = {
                'RUN': run,
                'STREAM': stream,
                'PROC_VER': streamConfig.Repack.ProcessingVersion,
                'MAX_SIZE_SINGLE_LUMI': streamConfig.Repack.MaxSizeSingleLumi,
                'MAX_SIZE_MULTI_LUMI': streamConfig.Repack.MaxSizeMultiLumi,
                'MIN_SIZE': streamConfig.Repack.MinInputSize,
                'MAX_SIZE': streamConfig.Repack.MaxInputSize,
                'MAX_EDM_SIZE': streamConfig.Repack.MaxEdmSize,
                'MAX_OVER_SIZE': streamConfig.Repack.MaxOverSize,
                'MAX_EVENTS': streamConfig.Repack.MaxInputEvents,
                'MAX_FILES': streamConfig.Repack.MaxInputFiles,
                'BLOCK_DELAY': streamConfig.Repack.BlockCloseDelay,
                'CMSSW': streamConfig.Repack.CMSSWVersion,
                'SCRAM_ARCH': streamConfig.Repack.ScramArch
            }

        elif streamConfig.ProcessingStyle == "Express":

            specialDataset = "Stream%s" % stream
            bindsDataset.append({'PRIMDS': specialDataset})
            bindsStreamDataset.append({
                'RUN': run,
                'PRIMDS': specialDataset,
                'STREAM': stream
            })
            bindsSpecialDataset = {'STREAM': stream, 'PRIMDS': specialDataset}
            bindsDatasetScenario.append({
                'RUN':
                run,
                'PRIMDS':
                specialDataset,
                'SCENARIO':
                streamConfig.Express.Scenario
            })

            if streamConfig.Express.WriteDQM:
                outputModuleDetails.append({
                    'dataTier': tier0Config.Global.DQMDataTier,
                    'eventContent': tier0Config.Global.DQMDataTier,
                    'primaryDataset': specialDataset
                })

            bindsStorageNode.append({'NODE': expressPhEDExSubscribeNode})

            bindsPhEDExConfig.append({
                'RUN': run,
                'PRIMDS': specialDataset,
                'ARCHIVAL_NODE': None,
                'TAPE_NODE': None,
                'DISK_NODE': expressPhEDExSubscribeNode
            })

            subscriptions.append({
                'custodialSites': [],
                'nonCustodialSites': [expressPhEDExSubscribeNode],
                'autoApproveSites': [expressPhEDExSubscribeNode],
                'priority':
                "high",
                'primaryDataset':
                specialDataset
            })

            alcaSkim = None
            if len(streamConfig.Express.AlcaSkims) > 0:
                outputModuleDetails.append({
                    'dataTier': "ALCARECO",
                    'eventContent': "ALCARECO",
                    'primaryDataset': specialDataset
                })
                alcaSkim = ",".join(streamConfig.Express.AlcaSkims)

                numPromptCalibProd = 0
                for producer in streamConfig.Express.AlcaSkims:
                    if producer.startswith("PromptCalibProd"):
                        numPromptCalibProd += 1

                if numPromptCalibProd > 0:
                    bindsPromptCalibration = {
                        'RUN': run,
                        'STREAM': stream,
                        'NUM_PRODUCER': numPromptCalibProd
                    }

            dqmSeq = None
            if len(streamConfig.Express.DqmSequences) > 0:
                dqmSeq = ",".join(streamConfig.Express.DqmSequences)

            streamConfig.Express.CMSSWVersion = streamConfig.VersionOverride.get(
                onlineVersion, onlineVersion)

            bindsCMSSWVersion.append(
                {'VERSION': streamConfig.Express.CMSSWVersion})

            streamConfig.Express.ScramArch = tier0Config.Global.ScramArches.get(
                streamConfig.Express.CMSSWVersion,
                tier0Config.Global.DefaultScramArch)

            streamConfig.Express.RecoScramArch = None
            if streamConfig.Express.RecoCMSSWVersion != None:

                bindsCMSSWVersion.append(
                    {'VERSION': streamConfig.Express.RecoCMSSWVersion})

                streamConfig.Express.RecoScramArch = tier0Config.Global.ScramArches.get(
                    streamConfig.Express.RecoCMSSWVersion,
                    tier0Config.Global.DefaultScramArch)

            bindsExpressConfig = {
                'RUN': run,
                'STREAM': stream,
                'PROC_VER': streamConfig.Express.ProcessingVersion,
                'WRITE_TIERS': ",".join(streamConfig.Express.DataTiers),
                'WRITE_DQM': streamConfig.Express.WriteDQM,
                'GLOBAL_TAG': streamConfig.Express.GlobalTag,
                'MAX_RATE': streamConfig.Express.MaxInputRate,
                'MAX_EVENTS': streamConfig.Express.MaxInputEvents,
                'MAX_SIZE': streamConfig.Express.MaxInputSize,
                'MAX_FILES': streamConfig.Express.MaxInputFiles,
                'MAX_LATENCY': streamConfig.Express.MaxLatency,
                'DQM_INTERVAL': streamConfig.Express.PeriodicHarvestInterval,
                'BLOCK_DELAY': streamConfig.Express.BlockCloseDelay,
                'CMSSW': streamConfig.Express.CMSSWVersion,
                'SCRAM_ARCH': streamConfig.Express.ScramArch,
                'RECO_CMSSW': streamConfig.Express.RecoCMSSWVersion,
                'RECO_SCRAM_ARCH': streamConfig.Express.RecoScramArch,
                'MULTICORE': streamConfig.Express.Multicore,
                'ALCA_SKIM': alcaSkim,
                'DQM_SEQ': dqmSeq
            }

        #
        # then configure datasets
        #
        getStreamDatasetTriggersDAO = daoFactory(
            classname="RunConfig.GetStreamDatasetTriggers")
        datasetTriggers = getStreamDatasetTriggersDAO.execute(
            run, stream, transaction=False)

        for dataset, paths in datasetTriggers.items():

            if dataset == "Unassigned path":
                if stream == "Express" and run in [
                        210114, 210116, 210120, 210121, 210178
                ]:
                    continue
                if stream == "A" and run in [216120, 216125, 216130]:
                    continue

            datasetConfig = retrieveDatasetConfig(tier0Config, dataset)

            selectEvents = []
            for path in sorted(paths):
                selectEvents.append("%s:%s" % (path, runInfo['process']))

            if streamConfig.ProcessingStyle == "Bulk":

                promptRecoDelay[datasetConfig.Name] = datasetConfig.RecoDelay
                promptRecoDelayOffset[
                    datasetConfig.Name] = datasetConfig.RecoDelayOffset

                outputModuleDetails.append({
                    'dataTier': "RAW",
                    'eventContent': "ALL",
                    'selectEvents': selectEvents,
                    'primaryDataset': dataset
                })

                bindsPhEDExConfig.append({
                    'RUN': run,
                    'PRIMDS': dataset,
                    'ARCHIVAL_NODE': datasetConfig.ArchivalNode,
                    'TAPE_NODE': datasetConfig.TapeNode,
                    'DISK_NODE': datasetConfig.DiskNode
                })

                custodialSites = []
                nonCustodialSites = []
                autoApproveSites = []
                if datasetConfig.ArchivalNode != None:
                    bindsStorageNode.append(
                        {'NODE': datasetConfig.ArchivalNode})
                    custodialSites.append(datasetConfig.ArchivalNode)
                    autoApproveSites.append(datasetConfig.ArchivalNode)
                if datasetConfig.TapeNode != None:
                    bindsStorageNode.append({'NODE': datasetConfig.TapeNode})
                    custodialSites.append(datasetConfig.TapeNode)
                if datasetConfig.DiskNode != None:
                    bindsStorageNode.append({'NODE': datasetConfig.DiskNode})
                    nonCustodialSites.append(datasetConfig.DiskNode)
                    autoApproveSites.append(datasetConfig.DiskNode)

                if len(custodialSites) > 0 or len(nonCustodialSites) > 0:
                    subscriptions.append({
                        'custodialSites': custodialSites,
                        'custodialSubType': "Replica",
                        'nonCustodialSites': nonCustodialSites,
                        'autoApproveSites': autoApproveSites,
                        'priority': "high",
                        'primaryDataset': dataset,
                        'dataTier': "RAW"
                    })

                #
                # set subscriptions for error dataset
                #
                custodialSites = []
                nonCustodialSites = []
                autoApproveSites = []
                if datasetConfig.ArchivalNode != None:
                    custodialSites.append(datasetConfig.ArchivalNode)
                    autoApproveSites.append(datasetConfig.ArchivalNode)
                if datasetConfig.ArchivalNode != expressPhEDExInjectNode:
                    nonCustodialSites.append(expressPhEDExInjectNode)
                    autoApproveSites.append(expressPhEDExInjectNode)

                if len(custodialSites) > 0 or len(nonCustodialSites) > 0:
                    subscriptions.append({
                        'custodialSites': custodialSites,
                        'custodialSubType': "Replica",
                        'nonCustodialSites': nonCustodialSites,
                        'autoApproveSites': autoApproveSites,
                        'priority': "high",
                        'primaryDataset': "%s-Error" % dataset,
                        'dataTier': "RAW"
                    })

            elif streamConfig.ProcessingStyle == "Express":

                for dataTier in streamConfig.Express.DataTiers:
                    if dataTier not in ["ALCARECO", "DQM", "DQMIO"]:

                        outputModuleDetails.append({
                            'dataTier': dataTier,
                            'eventContent': dataTier,
                            'selectEvents': selectEvents,
                            'primaryDataset': dataset
                        })

                bindsPhEDExConfig.append({
                    'RUN':
                    run,
                    'PRIMDS':
                    dataset,
                    'ARCHIVAL_NODE':
                    None,
                    'TAPE_NODE':
                    None,
                    'DISK_NODE':
                    expressPhEDExSubscribeNode
                })

                subscriptions.append({
                    'custodialSites': [],
                    'nonCustodialSites': [expressPhEDExSubscribeNode],
                    'autoApproveSites': [expressPhEDExSubscribeNode],
                    'priority':
                    "high",
                    'primaryDataset':
                    dataset
                })

        #
        # finally create WMSpec
        #
        outputs = {}
        if streamConfig.ProcessingStyle == "Bulk":

            taskName = "Repack"
            workflowName = "Repack_Run%d_Stream%s" % (run, stream)

            specArguments = {}

            specArguments['TimePerEvent'] = 1
            specArguments['SizePerEvent'] = 200
            specArguments['Memory'] = 1800

            specArguments['RequestPriority'] = 0

            specArguments['CMSSWVersion'] = streamConfig.Repack.CMSSWVersion
            specArguments['ScramArch'] = streamConfig.Repack.ScramArch

            specArguments[
                'ProcessingVersion'] = streamConfig.Repack.ProcessingVersion
            specArguments[
                'MaxSizeSingleLumi'] = streamConfig.Repack.MaxSizeSingleLumi
            specArguments[
                'MaxSizeMultiLumi'] = streamConfig.Repack.MaxSizeMultiLumi
            specArguments['MinInputSize'] = streamConfig.Repack.MinInputSize
            specArguments['MaxInputSize'] = streamConfig.Repack.MaxInputSize
            specArguments['MaxEdmSize'] = streamConfig.Repack.MaxEdmSize
            specArguments['MaxOverSize'] = streamConfig.Repack.MaxOverSize
            specArguments[
                'MaxInputEvents'] = streamConfig.Repack.MaxInputEvents
            specArguments['MaxInputFiles'] = streamConfig.Repack.MaxInputFiles

            specArguments['UnmergedLFNBase'] = "/store/unmerged/%s" % runInfo[
                'bulk_data_type']
            if runInfo['backfill']:
                specArguments['MergedLFNBase'] = "/store/backfill/%s/%s" % (
                    runInfo['backfill'], runInfo['bulk_data_type'])
            else:
                specArguments[
                    'MergedLFNBase'] = "/store/%s" % runInfo['bulk_data_type']

            specArguments[
                'BlockCloseDelay'] = streamConfig.Repack.BlockCloseDelay

        elif streamConfig.ProcessingStyle == "Express":

            taskName = "Express"
            workflowName = "Express_Run%d_Stream%s" % (run, stream)

            specArguments = {}

            specArguments['TimePerEvent'] = 12
            specArguments['SizePerEvent'] = 512
            specArguments['Memory'] = 1800

            if streamConfig.Express.Multicore:
                specArguments['Multicore'] = streamConfig.Express.Multicore
                specArguments['Memory'] = 1800 * streamConfig.Express.Multicore

            specArguments['RequestPriority'] = 0

            specArguments['ProcessingString'] = "Express"
            specArguments[
                'ProcessingVersion'] = streamConfig.Express.ProcessingVersion
            specArguments['Scenario'] = streamConfig.Express.Scenario

            specArguments['CMSSWVersion'] = streamConfig.Express.CMSSWVersion
            specArguments['ScramArch'] = streamConfig.Express.ScramArch
            specArguments[
                'RecoCMSSWVersion'] = streamConfig.Express.RecoCMSSWVersion
            specArguments['RecoScramArch'] = streamConfig.Express.RecoScramArch

            specArguments['GlobalTag'] = streamConfig.Express.GlobalTag
            specArguments['GlobalTagTransaction'] = "Express_%d" % run
            specArguments[
                'GlobalTagConnect'] = streamConfig.Express.GlobalTagConnect

            specArguments['MaxInputRate'] = streamConfig.Express.MaxInputRate
            specArguments[
                'MaxInputEvents'] = streamConfig.Express.MaxInputEvents
            specArguments['MaxInputSize'] = streamConfig.Express.MaxInputSize
            specArguments['MaxInputFiles'] = streamConfig.Express.MaxInputFiles
            specArguments['MaxLatency'] = streamConfig.Express.MaxLatency
            specArguments['AlcaSkims'] = streamConfig.Express.AlcaSkims
            specArguments['DqmSequences'] = streamConfig.Express.DqmSequences
            specArguments['AlcaHarvestTimeout'] = runInfo['ah_timeout']
            specArguments['AlcaHarvestDir'] = runInfo['ah_dir']
            specArguments['DQMUploadProxy'] = dqmUploadProxy
            specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl']
            specArguments['StreamName'] = stream
            specArguments['SpecialDataset'] = specialDataset

            specArguments['UnmergedLFNBase'] = "/store/unmerged/express"
            specArguments['MergedLFNBase'] = "/store/express"
            if runInfo['backfill']:
                specArguments[
                    'MergedLFNBase'] = "/store/backfill/%s/express" % runInfo[
                        'backfill']
            else:
                specArguments['MergedLFNBase'] = "/store/express"

            specArguments[
                'PeriodicHarvestInterval'] = streamConfig.Express.PeriodicHarvestInterval

            specArguments[
                'BlockCloseDelay'] = streamConfig.Express.BlockCloseDelay

        if streamConfig.ProcessingStyle in ['Bulk', 'Express']:

            specArguments['RunNumber'] = run
            specArguments['AcquisitionEra'] = tier0Config.Global.AcquisitionEra
            specArguments['Outputs'] = outputModuleDetails
            specArguments[
                'OverrideCatalog'] = "trivialcatalog_file:/cvmfs/cms.cern.ch/SITECONF/T2_CH_CERN/Tier0/override_catalog.xml?protocol=override"
            specArguments['ValidStatus'] = "VALID"

            specArguments['SiteWhitelist'] = ["T2_CH_CERN_T0"]
            specArguments['SiteBlacklist'] = []

        if streamConfig.ProcessingStyle == "Bulk":
            factory = RepackWorkloadFactory()
            wmSpec = factory.factoryWorkloadConstruction(
                workflowName, specArguments)
            wmSpec.setPhEDExInjectionOverride(runInfo['bulk_data_loc'])
            for subscription in subscriptions:
                wmSpec.setSubscriptionInformation(**subscription)
        elif streamConfig.ProcessingStyle == "Express":
            factory = ExpressWorkloadFactory()
            wmSpec = factory.factoryWorkloadConstruction(
                workflowName, specArguments)
            wmSpec.setPhEDExInjectionOverride(expressPhEDExInjectNode)
            for subscription in subscriptions:
                wmSpec.setSubscriptionInformation(**subscription)

        if streamConfig.ProcessingStyle in ['Bulk', 'Express']:
            wmSpec.setOwnerDetails(
                "*****@*****.**", "T0", {
                    'vogroup': 'DEFAULT',
                    'vorole': 'DEFAULT',
                    'dn': "*****@*****.**"
                })

            wmSpec.setupPerformanceMonitoring(maxRSS=10485760,
                                              maxVSize=10485760,
                                              softTimeout=604800,
                                              gracePeriod=3600)

            wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath=specDirectory)

        filesetName = "Run%d_Stream%s" % (run, stream)
        fileset = Fileset(filesetName)

        #
        # create workflow (currently either repack or express)
        #
        try:
            myThread.transaction.begin()
            if len(bindsCMSSWVersion) > 0:
                insertCMSSWVersionDAO.execute(bindsCMSSWVersion,
                                              conn=myThread.transaction.conn,
                                              transaction=True)
            if len(bindsDataset) > 0:
                insertDatasetDAO.execute(bindsDataset,
                                         conn=myThread.transaction.conn,
                                         transaction=True)
            if len(bindsStreamDataset) > 0:
                insertStreamDatasetDAO.execute(bindsStreamDataset,
                                               conn=myThread.transaction.conn,
                                               transaction=True)
            if len(bindsRepackConfig) > 0:
                insertRepackConfigDAO.execute(bindsRepackConfig,
                                              conn=myThread.transaction.conn,
                                              transaction=True)
            if len(bindsPromptCalibration) > 0:
                insertPromptCalibrationDAO.execute(
                    bindsPromptCalibration,
                    conn=myThread.transaction.conn,
                    transaction=True)
            if len(bindsExpressConfig) > 0:
                insertExpressConfigDAO.execute(bindsExpressConfig,
                                               conn=myThread.transaction.conn,
                                               transaction=True)
            if len(bindsSpecialDataset) > 0:
                insertSpecialDatasetDAO.execute(bindsSpecialDataset,
                                                conn=myThread.transaction.conn,
                                                transaction=True)
            if len(bindsDatasetScenario) > 0:
                insertDatasetScenarioDAO.execute(
                    bindsDatasetScenario,
                    conn=myThread.transaction.conn,
                    transaction=True)
            if len(bindsStorageNode) > 0:
                insertStorageNodeDAO.execute(bindsStorageNode,
                                             conn=myThread.transaction.conn,
                                             transaction=True)
            if len(bindsPhEDExConfig) > 0:
                insertPhEDExConfigDAO.execute(bindsPhEDExConfig,
                                              conn=myThread.transaction.conn,
                                              transaction=True)
            insertStreamStyleDAO.execute(bindsStreamStyle,
                                         conn=myThread.transaction.conn,
                                         transaction=True)
            if streamConfig.ProcessingStyle in ['Bulk', 'Express']:
                insertStreamFilesetDAO.execute(run,
                                               stream,
                                               filesetName,
                                               conn=myThread.transaction.conn,
                                               transaction=True)
                fileset.load()
                wmbsHelper.createSubscription(wmSpec.getTask(taskName),
                                              fileset,
                                              alternativeFilesetClose=True)
                insertWorkflowMonitoringDAO.execute(
                    [fileset.id],
                    conn=myThread.transaction.conn,
                    transaction=True)
            if streamConfig.ProcessingStyle == "Bulk":
                bindsRecoReleaseConfig = []
                for fileset, primds in wmbsHelper.getMergeOutputMapping(
                ).items():
                    bindsRecoReleaseConfig.append({
                        'RUN':
                        run,
                        'PRIMDS':
                        primds,
                        'FILESET':
                        fileset,
                        'RECODELAY':
                        promptRecoDelay[primds],
                        'RECODELAYOFFSET':
                        promptRecoDelayOffset[primds]
                    })
                insertRecoReleaseConfigDAO.execute(
                    bindsRecoReleaseConfig,
                    conn=myThread.transaction.conn,
                    transaction=True)
            elif streamConfig.ProcessingStyle == "Express":
                markWorkflowsInjectedDAO.execute(
                    [workflowName],
                    injected=True,
                    conn=myThread.transaction.conn,
                    transaction=True)
        except Exception as ex:
            logging.exception(ex)
            myThread.transaction.rollback()
            raise RuntimeError(
                "Problem in configureRunStream() database transaction !")
        else:
            myThread.transaction.commit()

    else:

        # should we do anything for local runs ?
        pass
    return
Пример #6
0
class JobGroup(WMBSBase, WMJobGroup):
    """
    A group (set) of Jobs
    """
    def __init__(self, subscription = None, jobs = None, id = -1, uid = None, location = None):
        WMBSBase.__init__(self)
        WMJobGroup.__init__(self, subscription=subscription, jobs = jobs)

        self.id = id
        self.lastUpdate = None
        self.uid = uid

        if location != None:
            self.setSite(location)

        return

    def create(self):
        """
        Add the new jobgroup to WMBS, create the output Fileset object
        """
        myThread = threading.currentThread()
        existingTransaction = self.beginTransaction()

        #overwrite base class self.output for WMBS fileset
        self.output = Fileset(name = makeUUID())
        self.output.create()

        if self.uid == None:
            self.uid = makeUUID()

        action = self.daofactory(classname = "JobGroup.New")
        action.execute(self.uid, self.subscription["id"],
                       self.output.id, conn = self.getDBConn(),
                       transaction = self.existingTransaction())

        self.id = self.exists()
        self.commitTransaction(existingTransaction)

        return

    def delete(self):
        """
        Remove a jobgroup from WMBS
        """
        deleteAction = self.daofactory(classname = "JobGroup.Delete")
        deleteAction.execute(id = self.id, conn = self.getDBConn(),
                             transaction = self.existingTransaction())

        return

    def exists(self):
        """
        Does a jobgroup exist with id if id is not provided, use the uid,
        return the id
        """
        if self.id != -1:
            action = self.daofactory(classname = "JobGroup.ExistsByID")
            result =  action.execute(id = self.id, conn = self.getDBConn(),
                                     transaction = self.existingTransaction())
        else:
            action = self.daofactory(classname = "JobGroup.Exists")
            result = action.execute(uid = self.uid, conn = self.getDBConn(),
                                    transaction = self.existingTransaction())

        return result

    def load(self):
        """
        _load_

        Load all meta data associated with the JobGroup.  This includes the
        JobGroup id, uid, last_update time, subscription id and output fileset
        id.  Either the JobGroup id or uid must be specified for this to work.
        """
        existingTransaction = self.beginTransaction()

        if self.id > 0:
            loadAction = self.daofactory(classname = "JobGroup.LoadFromID")
            result = loadAction.execute(self.id, conn = self.getDBConn(),
                                        transaction = self.existingTransaction())
        else:
            loadAction = self.daofactory(classname = "JobGroup.LoadFromUID")
            result = loadAction.execute(self.uid, conn = self.getDBConn(),
                                        transaction = self.existingTransaction())

        self.id = result["id"]
        self.uid = result["uid"]
        self.lastUpdate = result["last_update"]

        self.subscription = Subscription(id = result["subscription"])
        self.subscription.load()

        self.output = Fileset(id = result["output"])
        self.output.load()

        self.jobs = []
        self.commitTransaction(existingTransaction)
        return

    def loadData(self):
        """
        _loadData_

        Load all data that is associated with the jobgroup.  This includes
        loading all the subscription information, the output fileset
        information and all the jobs that are associated with the group.
        """
        existingTransaction = self.beginTransaction()

        if self.id < 0 or self.uid == None:
            self.load()

        self.subscription.loadData()
        self.output.loadData()

        loadAction = self.daofactory(classname = "JobGroup.LoadJobs")
        result = loadAction.execute(self.id, conn = self.getDBConn(),
                                    transaction = self.existingTransaction())

        self.jobs = []
        self.newjobs = []

        for jobID in result:
            newJob = Job(id = jobID["id"])
            newJob.loadData()
            self.add(newJob)

        WMJobGroup.commit(self)
        self.commitTransaction(existingTransaction)
        return

    def commit(self):
        """
        _commit_

        Write any new jobs to the database, creating them in the database if
        necessary.
        """
        existingTransaction = self.beginTransaction()

        if self.id == -1:
            self.create()

        for j in self.newjobs:
            j.create(group = self)

        WMJobGroup.commit(self)
        self.commitTransaction(existingTransaction)
        return


    def setSite(self, site_name = None):
        """
        Updates the jobGroup with a site_name from the wmbs_location table
        """
        if not self.exists():
            return

        action = self.daofactory(classname = "JobGroup.SetSite")
        result = action.execute(site_name = site_name, jobGroupID = self.id,
                                conn = self.getDBConn(), transaction = self.existingTransaction())

        return result


    def getSite(self):
        """
        Updates the jobGroup with a site_name from the wmbs_location table
        """
        if not self.exists():
            return

        action = self.daofactory(classname = "JobGroup.GetSite")
        result = action.execute(jobGroupID = self.id, conn = self.getDBConn(),
                                transaction = self.existingTransaction())

        return result

    def listJobIDs(self):
        """
        Returns a list of job IDs
        Useful for times when threading the loading of jobGroups, where running loadData can overload UUID
        """

        existingTransaction = self.beginTransaction()

        if self.id < 0 or self.uid == None:
            self.load()

        loadAction = self.daofactory(classname = "JobGroup.LoadJobs")
        result = loadAction.execute(self.id, conn = self.getDBConn(),
                                    transaction = self.existingTransaction())

        jobIDList = []

        for jobID in result:
            jobIDList.append(jobID["id"])

        self.commitTransaction(existingTransaction)
        return jobIDList


    def commitBulk(self):
        """
        Creates jobs in a group instead of singly, as is done in jobGroup.commit()
        """

        myThread = threading.currentThread()

        if self.id == -1:
            myThread.transaction.begin()
            #existingTransaction = self.beginTransaction()
            self.create()
            #self.commitTransaction(existingTransaction)
            myThread.transaction.commit()

        existingTransaction = self.beginTransaction()

        listOfJobs = []
        for job in self.newjobs:
            #First do all the header stuff
            if job["id"] != None:
                continue

            job["jobgroup"] = self.id

            if job["name"] == None:
                job["name"] = makeUUID()

            listOfJobs.append(job)

        bulkAction = self.daofactory(classname = "Jobs.New")
        result = bulkAction.execute(jobList = listOfJobs)

        #Use the results of the bulk commit to get the jobIDs
        fileDict = {}
        for job in listOfJobs:
            job['id'] = result[job['name']]
            fileDict[job['id']] = []
            for file in job['input_files']:
                fileDict[job['id']].append(file['id'])

        maskAction = self.daofactory(classname = "Masks.New")
        maskAction.execute(jobList = listOfJobs, conn = self.getDBConn(), \
                           transaction = self.existingTransaction())

        fileAction = self.daofactory(classname = "Jobs.AddFiles")
        fileAction.execute(jobDict = fileDict, conn = self.getDBConn(), \
                           transaction = self.existingTransaction())


        WMJobGroup.commit(self)
        self.commitTransaction(existingTransaction)

        return


    def getLocationsForJobs(self):
        """
        Gets a list of the locations that jobs can run at
        """
        if not self.exists():
            return

        action = self.daofactory(classname = "JobGroup.GetLocationsForJobs")
        result = action.execute(id = self.id, conn = self.getDBConn(),
                                transaction = self.existingTransaction())

        return result


    def __str__(self):
        """
        __str__

        Print out some information about the jobGroup
        as if jobGroup inherited from dict()
        """

        d = {'id': self.id, 'uid': self.uid, 'subscription': self.subscription,
             'output': self.output, 'jobs': self.jobs,
             'newjobs': self.newjobs}

        return str(d)
Пример #7
0
def configureRunStream(tier0Config, run, stream, specDirectory, dqmUploadProxy):
    """
    _configureRunStream_

    Called by Tier0Feeder for new run/streams.

    Retrieve global run settings and build the part
    of the configuration relevant to run/stream
    and write it to the database.

    Create workflows, filesets and subscriptions for
    the processing of runs/streams.

    """
    logging.debug("configureRunStream() : %d , %s" % (run, stream))
    myThread = threading.currentThread()

    daoFactory = DAOFactory(package = "T0.WMBS",
                            logger = logging,
                            dbinterface = myThread.dbi)

    # retrieve some basic run information
    getRunInfoDAO = daoFactory(classname = "RunConfig.GetRunInfo")
    runInfo = getRunInfoDAO.execute(run, transaction = False)[0]

    #
    # treat centralDAQ or miniDAQ runs (have an HLT key) different from local runs
    #
    if runInfo['hltkey'] != None:

        # streams not explicitely configured are repacked
        if stream not in tier0Config.Streams.dictionary_().keys():
            addRepackConfig(tier0Config, stream)

        streamConfig = tier0Config.Streams.dictionary_()[stream]

        # consistency check to make sure stream exists and has datasets defined
        # only run if we don't ignore the stream
        if streamConfig.ProcessingStyle != "Ignore":
            getStreamDatasetsDAO = daoFactory(classname = "RunConfig.GetStreamDatasets")
            datasets = getStreamDatasetsDAO.execute(run, stream, transaction = False)
            if len(datasets) == 0:
                raise RuntimeError("Stream is not defined in HLT menu or has no datasets !")


        # write stream/dataset mapping (for special express and error datasets)
        insertDatasetDAO = daoFactory(classname = "RunConfig.InsertPrimaryDataset")
        insertStreamDatasetDAO = daoFactory(classname = "RunConfig.InsertStreamDataset")

        # write stream configuration
        insertCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion")
        insertStreamStyleDAO = daoFactory(classname = "RunConfig.InsertStreamStyle")
        insertRepackConfigDAO = daoFactory(classname = "RunConfig.InsertRepackConfig")
        insertPromptCalibrationDAO = daoFactory(classname = "RunConfig.InsertPromptCalibration")
        insertExpressConfigDAO = daoFactory(classname = "RunConfig.InsertExpressConfig")
        insertSpecialDatasetDAO = daoFactory(classname = "RunConfig.InsertSpecialDataset")
        insertDatasetScenarioDAO = daoFactory(classname = "RunConfig.InsertDatasetScenario")
        insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset")
        insertRecoReleaseConfigDAO = daoFactory(classname = "RunConfig.InsertRecoReleaseConfig")
        insertWorkflowMonitoringDAO = daoFactory(classname = "RunConfig.InsertWorkflowMonitoring")
        insertStorageNodeDAO = daoFactory(classname = "RunConfig.InsertStorageNode")
        insertPhEDExConfigDAO = daoFactory(classname = "RunConfig.InsertPhEDExConfig")

        bindsCMSSWVersion = []
        bindsDataset = []
        bindsStreamDataset = []
        bindsStreamStyle = {'RUN' : run,
                            'STREAM' : stream,
                            'STYLE': streamConfig.ProcessingStyle }
        bindsRepackConfig = {}
        bindsPromptCalibration = {}
        bindsExpressConfig = {}
        bindsSpecialDataset = {}
        bindsDatasetScenario = []
        bindsStorageNode = []
        bindsPhEDExConfig = []

        # mark workflows as injected
        wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS",
                                    logger = logging,
                                    dbinterface = myThread.dbi)
        markWorkflowsInjectedDAO = wmbsDaoFactory(classname = "Workflow.MarkInjectedWorkflows")

        #
        # for spec creation, details for all outputs
        #
        outputModuleDetails = []

        #
        # special dataset for some express output
        #
        specialDataset = None

        #
        # for PromptReco delay settings
        #
        promptRecoDelay = {}
        promptRecoDelayOffset = {}

        #
        # for PhEDEx subscription settings
        #
        subscriptions = []

        # some hardcoded PhEDEx defaults
        expressPhEDExInjectNode = "T2_CH_CERN"
        expressPhEDExSubscribeNode = "T2_CH_CERN"

        #
        # first take care of all stream settings
        #
        getStreamOnlineVersionDAO = daoFactory(classname = "RunConfig.GetStreamOnlineVersion")
        onlineVersion = getStreamOnlineVersionDAO.execute(run, stream, transaction = False)

        if streamConfig.ProcessingStyle == "Bulk":

            streamConfig.Repack.CMSSWVersion = streamConfig.VersionOverride.get(onlineVersion, onlineVersion)

            bindsCMSSWVersion.append( { 'VERSION' : streamConfig.Repack.CMSSWVersion } )

            streamConfig.Repack.ScramArch = tier0Config.Global.ScramArches.get(streamConfig.Repack.CMSSWVersion,
                                                                               tier0Config.Global.DefaultScramArch)

            bindsRepackConfig = { 'RUN' : run,
                                  'STREAM' : stream,
                                  'PROC_VER': streamConfig.Repack.ProcessingVersion,
                                  'MAX_SIZE_SINGLE_LUMI' : streamConfig.Repack.MaxSizeSingleLumi,
                                  'MAX_SIZE_MULTI_LUMI' : streamConfig.Repack.MaxSizeMultiLumi,
                                  'MIN_SIZE' : streamConfig.Repack.MinInputSize,
                                  'MAX_SIZE' : streamConfig.Repack.MaxInputSize,
                                  'MAX_EDM_SIZE' : streamConfig.Repack.MaxEdmSize,
                                  'MAX_OVER_SIZE' : streamConfig.Repack.MaxOverSize,
                                  'MAX_EVENTS' : streamConfig.Repack.MaxInputEvents,
                                  'MAX_FILES' : streamConfig.Repack.MaxInputFiles,
                                  'BLOCK_DELAY' : streamConfig.Repack.BlockCloseDelay,
                                  'CMSSW' : streamConfig.Repack.CMSSWVersion,
                                  'SCRAM_ARCH' : streamConfig.Repack.ScramArch }

        elif streamConfig.ProcessingStyle == "Express":

            specialDataset = "Stream%s" % stream
            bindsDataset.append( { 'PRIMDS' : specialDataset } )
            bindsStreamDataset.append( { 'RUN' : run,
                                         'PRIMDS' : specialDataset,
                                         'STREAM' : stream } )
            bindsSpecialDataset = { 'STREAM' : stream,
                                    'PRIMDS' : specialDataset }
            bindsDatasetScenario.append( { 'RUN' : run,
                                           'PRIMDS' : specialDataset,
                                           'SCENARIO' : streamConfig.Express.Scenario } )

            if "DQM" in streamConfig.Express.DataTiers:
                outputModuleDetails.append( { 'dataTier' : "DQM",
                                              'eventContent' : "DQM",
                                              'primaryDataset' : specialDataset } )

            bindsStorageNode.append( { 'NODE' : expressPhEDExSubscribeNode } )

            bindsPhEDExConfig.append( { 'RUN' : run,
                                        'PRIMDS' : specialDataset,
                                        'ARCHIVAL_NODE' : None,
                                        'TAPE_NODE' : None,
                                        'DISK_NODE' : expressPhEDExSubscribeNode } )

            subscriptions.append( { 'custodialSites' : [],
                                    'nonCustodialSites' : [ expressPhEDExSubscribeNode ],
                                    'autoApproveSites' : [ expressPhEDExSubscribeNode ],
                                    'priority' : "high",
                                    'primaryDataset' : specialDataset } )

            alcaSkim = None
            if "ALCARECO" in streamConfig.Express.DataTiers:
                if len(streamConfig.Express.AlcaSkims) > 0:
                    outputModuleDetails.append( { 'dataTier' : "ALCARECO",
                                                  'eventContent' : "ALCARECO",
                                                  'primaryDataset' : specialDataset } )
                    alcaSkim = ",".join(streamConfig.Express.AlcaSkims)

                    numPromptCalibProd = 0
                    for producer in streamConfig.Express.AlcaSkims:
                        if producer.startswith("PromptCalibProd"):
                            numPromptCalibProd += 1

                    if numPromptCalibProd > 0:
                        bindsPromptCalibration = { 'RUN' : run,
                                                   'STREAM' : stream,
                                                   'NUM_PRODUCER' : numPromptCalibProd }

            dqmSeq = None
            if len(streamConfig.Express.DqmSequences) > 0:
                dqmSeq = ",".join(streamConfig.Express.DqmSequences)

            streamConfig.Express.CMSSWVersion = streamConfig.VersionOverride.get(onlineVersion, onlineVersion)

            bindsCMSSWVersion.append( { 'VERSION' : streamConfig.Express.CMSSWVersion } )

            streamConfig.Express.ScramArch = tier0Config.Global.ScramArches.get(streamConfig.Express.CMSSWVersion,
                                                                                tier0Config.Global.DefaultScramArch)
            
            streamConfig.Express.RecoScramArch = None
            if streamConfig.Express.RecoCMSSWVersion != None:

                bindsCMSSWVersion.append( { 'VERSION' : streamConfig.Express.RecoCMSSWVersion } )

                streamConfig.Express.RecoScramArch = tier0Config.Global.ScramArches.get(streamConfig.Express.RecoCMSSWVersion,
                                                                                        tier0Config.Global.DefaultScramArch)

            bindsExpressConfig = { 'RUN' : run,
                                   'STREAM' : stream,
                                   'PROC_VER' : streamConfig.Express.ProcessingVersion,
                                   'WRITE_TIERS' : ",".join(streamConfig.Express.DataTiers),
                                   'GLOBAL_TAG' : streamConfig.Express.GlobalTag,
                                   'MAX_RATE' : streamConfig.Express.MaxInputRate,
                                   'MAX_EVENTS' : streamConfig.Express.MaxInputEvents,
                                   'MAX_SIZE' : streamConfig.Express.MaxInputSize,
                                   'MAX_FILES' : streamConfig.Express.MaxInputFiles,
                                   'MAX_LATENCY' : streamConfig.Express.MaxLatency,
                                   'DQM_INTERVAL' : streamConfig.Express.PeriodicHarvestInterval,
                                   'BLOCK_DELAY' : streamConfig.Express.BlockCloseDelay,
                                   'CMSSW' : streamConfig.Express.CMSSWVersion,
                                   'SCRAM_ARCH' : streamConfig.Express.ScramArch,
                                   'RECO_CMSSW' : streamConfig.Express.RecoCMSSWVersion,
                                   'RECO_SCRAM_ARCH' : streamConfig.Express.RecoScramArch,
                                   'MULTICORE' : streamConfig.Express.Multicore,
                                   'ALCA_SKIM' : alcaSkim,
                                   'DQM_SEQ' : dqmSeq }

        #
        # then configure datasets
        #
        getStreamDatasetTriggersDAO = daoFactory(classname = "RunConfig.GetStreamDatasetTriggers")
        datasetTriggers = getStreamDatasetTriggersDAO.execute(run, stream, transaction = False)

        for dataset, paths in datasetTriggers.items():

            if dataset == "Unassigned path":
                if stream == "Express" and run in [ 210114, 210116, 210120, 210121, 210178 ]:
                    continue
                if stream == "A" and run in [ 216120, 216125, 216130 ]:
                    continue

            datasetConfig = retrieveDatasetConfig(tier0Config, dataset)

            selectEvents = []
            for path in sorted(paths):
                selectEvents.append("%s:%s" % (path, runInfo['process']))

            if streamConfig.ProcessingStyle == "Bulk":

                promptRecoDelay[datasetConfig.Name] = datasetConfig.RecoDelay
                promptRecoDelayOffset[datasetConfig.Name] = datasetConfig.RecoDelayOffset

                outputModuleDetails.append( { 'dataTier' : "RAW",
                                              'eventContent' : "ALL",
                                              'selectEvents' : selectEvents,
                                              'primaryDataset' : dataset } )

                bindsPhEDExConfig.append( { 'RUN' : run,
                                            'PRIMDS' : dataset,
                                            'ARCHIVAL_NODE' : datasetConfig.ArchivalNode,
                                            'TAPE_NODE' : datasetConfig.TapeNode,
                                            'DISK_NODE' : datasetConfig.DiskNode } )

                custodialSites = []
                nonCustodialSites = []
                autoApproveSites = []
                if datasetConfig.ArchivalNode != None:
                    bindsStorageNode.append( { 'NODE' : datasetConfig.ArchivalNode } )
                    custodialSites.append(datasetConfig.ArchivalNode)
                    autoApproveSites.append(datasetConfig.ArchivalNode)
                if datasetConfig.TapeNode != None:
                    bindsStorageNode.append( { 'NODE' : datasetConfig.TapeNode } )
                    custodialSites.append(datasetConfig.TapeNode)
                if datasetConfig.DiskNode != None:
                    bindsStorageNode.append( { 'NODE' : datasetConfig.DiskNode } )
                    nonCustodialSites.append(datasetConfig.DiskNode)
                    autoApproveSites.append(datasetConfig.DiskNode)

                if len(custodialSites) > 0 or len(nonCustodialSites) > 0:
                    subscriptions.append( { 'custodialSites' : custodialSites,
                                            'custodialSubType' : "Replica",
                                            'nonCustodialSites' : nonCustodialSites,
                                            'autoApproveSites' : autoApproveSites,
                                            'priority' : "high",
                                            'primaryDataset' : dataset,
                                            'dataTier' : "RAW" } )

                #
                # set subscriptions for error dataset
                #
                custodialSites = []
                nonCustodialSites = []
                autoApproveSites = []
                if datasetConfig.ArchivalNode != None:
                    custodialSites.append(datasetConfig.ArchivalNode)
                    autoApproveSites.append(datasetConfig.ArchivalNode)
                if datasetConfig.ArchivalNode != expressPhEDExInjectNode:
                    nonCustodialSites.append(expressPhEDExInjectNode)
                    autoApproveSites.append(expressPhEDExInjectNode)

                if len(custodialSites) > 0 or len(nonCustodialSites) > 0:
                    subscriptions.append( { 'custodialSites' : custodialSites,
                                            'custodialSubType' : "Replica",
                                            'nonCustodialSites' : nonCustodialSites,
                                            'autoApproveSites' : autoApproveSites,
                                            'priority' : "high",
                                            'primaryDataset' : "%s-Error" % dataset,
                                            'dataTier' : "RAW" } )


            elif streamConfig.ProcessingStyle == "Express":

                for dataTier in streamConfig.Express.DataTiers:
                    if dataTier not in [ "ALCARECO", "DQM" ]:

                        outputModuleDetails.append( { 'dataTier' : dataTier,
                                                      'eventContent' : dataTier,
                                                      'selectEvents' : selectEvents,
                                                      'primaryDataset' : dataset } )

                bindsPhEDExConfig.append( { 'RUN' : run,
                                            'PRIMDS' : dataset,
                                            'ARCHIVAL_NODE' : None,
                                            'TAPE_NODE' : None,
                                            'DISK_NODE' : expressPhEDExSubscribeNode } )

                subscriptions.append( { 'custodialSites' : [],
                                        'nonCustodialSites' : [ expressPhEDExSubscribeNode ],
                                        'autoApproveSites' : [ expressPhEDExSubscribeNode ],
                                        'priority' : "high",
                                        'primaryDataset' : dataset } )

        #
        # finally create WMSpec
        #
        outputs = {}
        if streamConfig.ProcessingStyle == "Bulk":

            taskName = "Repack"
            workflowName = "Repack_Run%d_Stream%s" % (run, stream)

            specArguments = {}

            specArguments['Group'] = "unknown"
            specArguments['Requestor'] = "unknown"
            specArguments['RequestorDN'] = "unknown"

            specArguments['TimePerEvent'] = 1
            specArguments['SizePerEvent'] = 200
            specArguments['Memory'] = 1800

            specArguments['RequestPriority'] = 0

            specArguments['CMSSWVersion'] = streamConfig.Repack.CMSSWVersion
            specArguments['ScramArch'] = streamConfig.Repack.ScramArch

            specArguments['ProcessingVersion'] = streamConfig.Repack.ProcessingVersion
            specArguments['MaxSizeSingleLumi'] = streamConfig.Repack.MaxSizeSingleLumi
            specArguments['MaxSizeMultiLumi'] = streamConfig.Repack.MaxSizeMultiLumi
            specArguments['MinInputSize'] = streamConfig.Repack.MinInputSize
            specArguments['MaxInputSize'] = streamConfig.Repack.MaxInputSize
            specArguments['MaxEdmSize'] = streamConfig.Repack.MaxEdmSize
            specArguments['MaxOverSize'] = streamConfig.Repack.MaxOverSize
            specArguments['MaxInputEvents'] = streamConfig.Repack.MaxInputEvents
            specArguments['MaxInputFiles'] = streamConfig.Repack.MaxInputFiles

            specArguments['UnmergedLFNBase'] = "/store/unmerged/%s" % runInfo['bulk_data_type']
            if runInfo['backfill']:
                specArguments['MergedLFNBase'] = "/store/backfill/%s/%s" % (runInfo['backfill'],
                                                                            runInfo['bulk_data_type'])
            else:
                specArguments['MergedLFNBase'] = "/store/%s" % runInfo['bulk_data_type']

            specArguments['BlockCloseDelay'] = streamConfig.Repack.BlockCloseDelay

        elif streamConfig.ProcessingStyle == "Express":

            taskName = "Express"
            workflowName = "Express_Run%d_Stream%s" % (run, stream)

            specArguments = {}

            specArguments['Group'] = "unknown"
            specArguments['Requestor'] = "unknown"
            specArguments['RequestorDN'] = "unknown"

            specArguments['TimePerEvent'] = 12
            specArguments['SizePerEvent'] = 512
            specArguments['Memory'] = 1800

            if streamConfig.Express.Multicore:
                specArguments['Multicore'] = streamConfig.Express.Multicore
                specArguments['Memory'] = 1800 * streamConfig.Express.Multicore

            specArguments['RequestPriority'] = 0

            specArguments['ProcessingString'] = "Express"
            specArguments['ProcessingVersion'] = streamConfig.Express.ProcessingVersion
            specArguments['Scenario'] = streamConfig.Express.Scenario

            specArguments['CMSSWVersion'] = streamConfig.Express.CMSSWVersion
            specArguments['ScramArch'] = streamConfig.Express.ScramArch
            specArguments['RecoCMSSWVersion'] = streamConfig.Express.RecoCMSSWVersion
            specArguments['RecoScramArch'] = streamConfig.Express.RecoScramArch

            specArguments['GlobalTag'] = streamConfig.Express.GlobalTag
            specArguments['GlobalTagTransaction'] = "Express_%d" % run
            specArguments['GlobalTagConnect'] = streamConfig.Express.GlobalTagConnect

            specArguments['MaxInputRate'] = streamConfig.Express.MaxInputRate
            specArguments['MaxInputEvents'] = streamConfig.Express.MaxInputEvents
            specArguments['MaxInputSize'] = streamConfig.Express.MaxInputSize
            specArguments['MaxInputFiles'] = streamConfig.Express.MaxInputFiles
            specArguments['MaxLatency'] = streamConfig.Express.MaxLatency
            specArguments['AlcaSkims'] = streamConfig.Express.AlcaSkims
            specArguments['DqmSequences'] = streamConfig.Express.DqmSequences
            specArguments['AlcaHarvestTimeout'] = runInfo['ah_timeout']
            specArguments['AlcaHarvestDir'] = runInfo['ah_dir']
            specArguments['DQMUploadProxy'] = dqmUploadProxy
            specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl']
            specArguments['StreamName'] = stream
            specArguments['SpecialDataset'] = specialDataset

            specArguments['UnmergedLFNBase'] = "/store/unmerged/express"
            specArguments['MergedLFNBase'] = "/store/express"
            if runInfo['backfill']:
                specArguments['MergedLFNBase'] = "/store/backfill/%s/express" % runInfo['backfill']
            else:
                specArguments['MergedLFNBase'] = "/store/express"

            specArguments['PeriodicHarvestInterval'] = streamConfig.Express.PeriodicHarvestInterval

            specArguments['BlockCloseDelay'] = streamConfig.Express.BlockCloseDelay

        if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]:

            specArguments['RunNumber'] = run
            specArguments['AcquisitionEra'] = tier0Config.Global.AcquisitionEra
            specArguments['Outputs'] = outputModuleDetails
            specArguments['OverrideCatalog'] = "trivialcatalog_file:/cvmfs/cms.cern.ch/SITECONF/T2_CH_CERN/Tier0/override_catalog.xml?protocol=override"
            specArguments['ValidStatus'] = "VALID"

            specArguments['SiteWhitelist'] = [ "T2_CH_CERN_T0" ]
            specArguments['SiteBlacklist'] = []

        if streamConfig.ProcessingStyle == "Bulk":
            factory = RepackWorkloadFactory()
            wmSpec = factory.factoryWorkloadConstruction(workflowName, specArguments)
            wmSpec.setPhEDExInjectionOverride(runInfo['bulk_data_loc'])
            for subscription in subscriptions:
                wmSpec.setSubscriptionInformation(**subscription)
        elif streamConfig.ProcessingStyle == "Express":
            factory = ExpressWorkloadFactory()
            wmSpec = factory.factoryWorkloadConstruction(workflowName, specArguments)
            wmSpec.setPhEDExInjectionOverride(expressPhEDExInjectNode)
            for subscription in subscriptions:
                wmSpec.setSubscriptionInformation(**subscription)

        if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]:
            wmSpec.setOwnerDetails("*****@*****.**", "T0",
                                   { 'vogroup': 'DEFAULT', 'vorole': 'DEFAULT',
                                     'dn' : "*****@*****.**" } )

            wmSpec.setupPerformanceMonitoring(maxRSS = 10485760, maxVSize = 10485760,
                                              softTimeout = 604800, gracePeriod = 3600)

            wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath = specDirectory)

        filesetName = "Run%d_Stream%s" % (run, stream)
        fileset = Fileset(filesetName)

        #
        # create workflow (currently either repack or express)
        #
        try:
            myThread.transaction.begin()
            if len(bindsCMSSWVersion) > 0:
                insertCMSSWVersionDAO.execute(bindsCMSSWVersion, conn = myThread.transaction.conn, transaction = True)
            if len(bindsDataset) > 0:
                insertDatasetDAO.execute(bindsDataset, conn = myThread.transaction.conn, transaction = True)
            if len(bindsStreamDataset) > 0:
                insertStreamDatasetDAO.execute(bindsStreamDataset, conn = myThread.transaction.conn, transaction = True)
            if len(bindsRepackConfig) > 0:
                insertRepackConfigDAO.execute(bindsRepackConfig, conn = myThread.transaction.conn, transaction = True)
            if len(bindsPromptCalibration) > 0:
                insertPromptCalibrationDAO.execute(bindsPromptCalibration, conn = myThread.transaction.conn, transaction = True)
            if len(bindsExpressConfig) > 0:
                insertExpressConfigDAO.execute(bindsExpressConfig, conn = myThread.transaction.conn, transaction = True)
            if len(bindsSpecialDataset) > 0:
                insertSpecialDatasetDAO.execute(bindsSpecialDataset, conn = myThread.transaction.conn, transaction = True)
            if len(bindsDatasetScenario) > 0:
                insertDatasetScenarioDAO.execute(bindsDatasetScenario, conn = myThread.transaction.conn, transaction = True)
            if len(bindsStorageNode) > 0:
                insertStorageNodeDAO.execute(bindsStorageNode, conn = myThread.transaction.conn, transaction = True)
            if len(bindsPhEDExConfig) > 0:
                insertPhEDExConfigDAO.execute(bindsPhEDExConfig, conn = myThread.transaction.conn, transaction = True)
            insertStreamStyleDAO.execute(bindsStreamStyle, conn = myThread.transaction.conn, transaction = True)
            if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]:
                insertStreamFilesetDAO.execute(run, stream, filesetName, conn = myThread.transaction.conn, transaction = True)
                fileset.load()
                wmbsHelper.createSubscription(wmSpec.getTask(taskName), fileset, alternativeFilesetClose = True)
                insertWorkflowMonitoringDAO.execute([fileset.id],  conn = myThread.transaction.conn, transaction = True)
            if streamConfig.ProcessingStyle == "Bulk":
                bindsRecoReleaseConfig = []
                for fileset, primds in wmbsHelper.getMergeOutputMapping().items():
                    bindsRecoReleaseConfig.append( { 'RUN' : run,
                                                     'PRIMDS' : primds,
                                                     'FILESET' : fileset,
                                                     'RECODELAY' : promptRecoDelay[primds],
                                                     'RECODELAYOFFSET' : promptRecoDelayOffset[primds] } )
                insertRecoReleaseConfigDAO.execute(bindsRecoReleaseConfig, conn = myThread.transaction.conn, transaction = True)
            elif streamConfig.ProcessingStyle == "Express":
                markWorkflowsInjectedDAO.execute([workflowName], injected = True, conn = myThread.transaction.conn, transaction = True)
        except Exception as ex:
            logging.exception(ex)
            myThread.transaction.rollback()
            raise RuntimeError("Problem in configureRunStream() database transaction !")
        else:
            myThread.transaction.commit()

    else:

        # should we do anything for local runs ?
        pass
    return
Пример #8
0
    def databaseWork(self):
        """
        completed, set the fileset to close (Not implemented yet)
        """
        fileset_watch_temp = []
        listFileset = {}
        fileset_new = {}

        myThread = threading.currentThread()
        myThread.transaction.begin()

        # Get All managed filesets
        managedFilesets = self.getAllManagedFilesets.execute(\
        conn = myThread.transaction.conn, transaction = True)

        myThread.transaction.commit()

        logging.debug("Found %s managed filesets" % len(managedFilesets))

        for fileset in range(len(managedFilesets)):

            logging.debug("Processing %s %s" % \
  ( managedFilesets[fileset]['id'] , managedFilesets[fileset]['name'] ) )

            filesetToUpdate = Fileset(id=managedFilesets[fileset]['id'])
            filesetToUpdate.load()

            if managedFilesets[fileset]['name'] not in self.fileset_watch:

                self.fileset_watch[filesetToUpdate.name] = filesetToUpdate
                fileset_new[filesetToUpdate.name] = filesetToUpdate

            listFileset[filesetToUpdate.name] = filesetToUpdate

        # Update the list of the fileset to watch
        for oldFileset in self.fileset_watch:

            if oldFileset not in listFileset:

                fileset_watch_temp.append(oldFileset)
        # Remove from the list of the fileset to update the ones which are not
        # in ManagedFilesets anymore
        for oldTempFileset in fileset_watch_temp:
            del self.fileset_watch[oldTempFileset]

        logging.debug("NEW FILESETS %s" %fileset_new)
        logging.debug("OLD FILESETS %s" %self.fileset_watch)

        # WorkQueue work
        for name, fileset in fileset_new.items():

            logging.debug("Will poll %s : %s" % (name, fileset.id))
            self.workq.enqueue(name, fileset)

        for key, filesets in self.workq.__iter__():

            fileset = self.fileset_watch[key]
            logging.debug \
      ("the poll key %s result %s is ready !" % (key, str(fileset.id)))

            myThread.transaction.begin()

            feederId = self.getFeederId.execute( \
    feederType = (fileset.name).split(":")[1], \
conn = myThread.transaction.conn, transaction = True )

            myThread.transaction.commit()

            logging.debug("the Feeder %s has processed %s and is \
                  removing it if closed" % (feederId, fileset.name) )


            # Finally delete fileset
            # If the fileset is closed remove it
            fileset.load()
            if fileset.open == False:

                myThread.transaction.begin()
                self.removeManagedFilesets.execute( \
filesetId = fileset.id, feederType = feederId, \
        conn = myThread.transaction.conn, transaction = True )
                myThread.transaction.commit()

        # Handles old filesets. We update old filesets every 10 mn
        # We need to make old filesets update cycle configurable
        if ((time.time()/60) - self.last_poll_time) > 10 :

            # WorkQueue handles old filesets
            for name, fileset in self.fileset_watch.items():

                logging.debug("Will poll %s : %s" % (name, fileset.id))
                self.workq.enqueue(name, fileset)

            for key, filesets in self.workq.__iter__():

                fileset = self.fileset_watch[key]
                logging.debug \
          ("the poll key %s result %s is ready !" % (key, str(fileset.id)))

                myThread.transaction.begin()
                feederId = self.getFeederId.execute(\
           feederType = (fileset.name).split(":")[1], \
     conn = myThread.transaction.conn, transaction = True )
                myThread.transaction.commit()

                logging.debug("the Feeder %s has processed %s and is \
                      removing it if closed" % (feederId, fileset.name) )


                # Finally delete fileset
                # If the fileset is closed remove it
                fileset.load()
                if fileset.open == False:

                    myThread.transaction.begin()
                    self.removeManagedFilesets.execute(\
        filesetId = fileset.id, feederType = feederId, \
    conn = myThread.transaction.conn, transaction = True )
                    myThread.transaction.commit()

            # Update the last update time of old filesets
            self.last_poll_time = time.time()/60
Пример #9
0
    def databaseWork(self):
        """
        completed, set the fileset to close (Not implemented yet)
        """
        fileset_watch_temp = []
        listFileset = {}
        fileset_new = {}

        myThread = threading.currentThread()
        myThread.transaction.begin()

        # Get All managed filesets
        managedFilesets = self.getAllManagedFilesets.execute(\
        conn = myThread.transaction.conn, transaction = True)

        myThread.transaction.commit()

        logging.debug("Found %s managed filesets" % len(managedFilesets))

        for fileset in range(len(managedFilesets)):

            logging.debug("Processing %s %s" % \
  ( managedFilesets[fileset]['id'] , managedFilesets[fileset]['name'] ) )

            filesetToUpdate = Fileset(id=managedFilesets[fileset]['id'])
            filesetToUpdate.load()

            if managedFilesets[fileset]['name'] not in self.fileset_watch:

                self.fileset_watch[filesetToUpdate.name] = filesetToUpdate
                fileset_new[filesetToUpdate.name] = filesetToUpdate

            listFileset[filesetToUpdate.name] = filesetToUpdate

        # Update the list of the fileset to watch
        for oldFileset in self.fileset_watch:

            if oldFileset not in listFileset:

                fileset_watch_temp.append(oldFileset)
        # Remove from the list of the fileset to update the ones which are not
        # in ManagedFilesets anymore
        for oldTempFileset in fileset_watch_temp:
            del self.fileset_watch[oldTempFileset]

        logging.debug("NEW FILESETS %s" % fileset_new)
        logging.debug("OLD FILESETS %s" % self.fileset_watch)

        # WorkQueue work
        for name, fileset in fileset_new.items():

            logging.debug("Will poll %s : %s" % (name, fileset.id))
            self.workq.enqueue(name, fileset)

        for key, filesets in self.workq.__iter__():

            fileset = self.fileset_watch[key]
            logging.debug \
      ("the poll key %s result %s is ready !" % (key, str(fileset.id)))

            myThread.transaction.begin()

            feederId = self.getFeederId.execute( \
    feederType = (fileset.name).split(":")[1], \
conn = myThread.transaction.conn, transaction = True )

            myThread.transaction.commit()

            logging.debug("the Feeder %s has processed %s and is \
                  removing it if closed" % (feederId, fileset.name))

            # Finally delete fileset
            # If the fileset is closed remove it
            fileset.load()
            if fileset.open == False:

                myThread.transaction.begin()
                self.removeManagedFilesets.execute( \
filesetId = fileset.id, feederType = feederId, \
        conn = myThread.transaction.conn, transaction = True )
                myThread.transaction.commit()

        # Handles old filesets. We update old filesets every 10 mn
        # We need to make old filesets update cycle configurable
        if ((time.time() / 60) - self.last_poll_time) > 10:

            # WorkQueue handles old filesets
            for name, fileset in self.fileset_watch.items():

                logging.debug("Will poll %s : %s" % (name, fileset.id))
                self.workq.enqueue(name, fileset)

            for key, filesets in self.workq.__iter__():

                fileset = self.fileset_watch[key]
                logging.debug \
          ("the poll key %s result %s is ready !" % (key, str(fileset.id)))

                myThread.transaction.begin()
                feederId = self.getFeederId.execute(\
           feederType = (fileset.name).split(":")[1], \
     conn = myThread.transaction.conn, transaction = True )
                myThread.transaction.commit()

                logging.debug("the Feeder %s has processed %s and is \
                      removing it if closed" % (feederId, fileset.name))

                # Finally delete fileset
                # If the fileset is closed remove it
                fileset.load()
                if fileset.open == False:

                    myThread.transaction.begin()
                    self.removeManagedFilesets.execute(\
        filesetId = fileset.id, feederType = feederId, \
    conn = myThread.transaction.conn, transaction = True )
                    myThread.transaction.commit()

            # Update the last update time of old filesets
            self.last_poll_time = time.time() / 60
    def databaseWork(self):
        """
        Queries DB for all watched filesets, if a filesets matches become
        available, create the subscriptions
        """
        # Get all watched workflows
        availableWorkflows = self.getUnsubscribedWorkflows.execute()
        logging.debug("Found %s unsubscribed managed workflows" \
              % len(availableWorkflows))

        # Get all filesets to check if they match a wrokflow
        availableFilesets = self.getAllFilesets.execute()

        logging.debug("Found %s filesets" % len(availableFilesets))

        # Loop on unsubscribed workflows to match filesets
        for managedWorkflow in availableWorkflows:

            # Workflow object cache to pass into Subscription constructor
            wfObj = None

            for fileset in availableFilesets:

                # Fileset object cache
                fsObj = None

                # Load the location information
                #whitelist = Set()
                #blacklist = Set()
                # Location is only caf
                #locations = self.queries.getLocations(managedWorkflow['id'])
                #for location in locations:
                #    if bool(int(location['valid'])) == True:
                #        whitelist.add(location['site_name'])
                #    else:
                #        blacklist.add(location['site_name'])

                # Attempt to match workflows to filesets
                if re.match(managedWorkflow['fileset_match'], fileset['name']):

                    # Log in debug
                    msg = "Creating subscription for %s to workflow id %s"
                    msg %= (fileset['name'], managedWorkflow['workflow'])
                    logging.debug(msg)

                    # Match found - Load the fileset if not already loaded
                    if not fsObj:
                        fsObj = Fileset(id = fileset['id'])
                        fsObj.load()

                    # Load the workflow if not already loaded
                    if not wfObj:
                        wfObj = Workflow(id = managedWorkflow['workflow'])
                        wfObj.load()

                    # Create the subscription
                    newSub = Subscription(fileset = fsObj, \
                                     workflow = wfObj, \
                                     #whitelist = whitelist, \
                                     #blacklist = blacklist, \
                                     split_algo = managedWorkflow['split_algo'],
                                     type = managedWorkflow['type'])
                    newSub.create()

        managedWorkflows = self.getManagedWorkflows.execute()
        logging.debug("Found %s  managed workflows" \
              % len(managedWorkflows))

        unsubscribedFilesets = self.getUnsubscribedFilesets.execute()
        logging.debug("Found %s unsubscribed filesets" % \
                len(unsubscribedFilesets))

        # Loop on unsubscribed filesets to match workflows
        for unsubscribedFileset in unsubscribedFilesets:

            # Workflow object cache to pass into Subscription constructor
            # FIXME
            wfObj = None

            for managedWork in managedWorkflows:

                logging.debug("The workflow %s" %managedWork['workflow'])

                # Fileset object cache
                wfObj = None
                fsObj = None

                # Load the location information
                #whitelist = Set()
                #blacklist = Set()
                # Location is only caf
                #locations = self.queries.getLocations(managedWorkflow['id'])
                #for location in locations:
                #    if bool(int(location['valid'])) == True:
                #        whitelist.add(location['site_name'])
                #    else:
                #        blacklist.add(location['site_name'])

                # Attempt to match workflows to filesets
                if re.match(managedWork['fileset_match'], \
                     unsubscribedFileset['name']):
                    # Log in debug
                    msg = "Creating subscription for %s to workflow id %s"
                    msg %= (unsubscribedFileset['name'], \
                          managedWork['workflow'])
                    logging.debug(msg)

                    # Match found - Load the fileset if not already loaded
                    if not fsObj:
                        fsObj = Fileset(id = unsubscribedFileset['id'])
                        fsObj.load()

                    # Load the workflow if not already loaded
                    if not wfObj:
                        wfObj = Workflow(id = managedWork['workflow'])
                        wfObj.load()

                    # Create the subscription
                    newSub = Subscription(fileset = fsObj, \
                                     workflow = wfObj, \
                                     #whitelist = whitelist, \
                                     #blacklist = blacklist, \
                                     split_algo = managedWork['split_algo'],
                                     type = managedWork['type'])
                    newSub.create()
                    newSub.load()
    def __call__(self, parameters):
        """
        Perform the work required with the given parameters
        """
        DefaultSlave.__call__(self, parameters)

        # Handle the message
        message = self.messageArgs

        # Lock on the running feeders list
        myThread = threading.currentThread()
        myThread.runningFeedersLock.acquire()

        # Create empty fileset if fileset.name doesn't exist
        filesetName = message["dataset"]
        feederType = message["FeederType"]
        fileType = message["FileType"]
        startRun = message["StartRun"]

        logging.debug("Dataset " + filesetName + " arrived")

        fileset = Fileset(name = filesetName+':'\
          +feederType+':'+fileType+':'+startRun)

        # Check if the fileset is already there
        if fileset.exists() == False:

            # Empty fileset creation
            fileset.create()
            fileset.setLastUpdate(0)

            logging.info("Fileset %s whith id %s is added" \
                               %(fileset.name, str(fileset.id)))

            # Get feeder type
            feederType = message["FeederType"]

            # Check if there is a running feeder
            if myThread.runningFeeders.has_key(feederType):
                logging.info("HAVE FEEDER " + feederType + " RUNNING")
                logging.info(myThread.runningFeeders[feederType])

            else:
                logging.info("NO FEEDER " + feederType + " RUNNING")

                # Check if we have a feeder in DB
                if self.queries.checkFeeder(feederType):
                    # Have feeder, get info
                    logging.info("Getting Feeder from DB")
                    feederId = self.queries.getFeederId(feederType)
                    logging.info(feederId)
                    myThread.runningFeeders[feederType] = feederId
                else:
                    # Create feeder
                    logging.info("Adding Feeder to DB")
                    self.queries.addFeeder(feederType, "StatePath")
                    feederId = self.queries.getFeederId(feederType)
                    logging.info(feederId)
                    myThread.runningFeeders[feederType] = feederId

            # Fileset/Feeder association
            self.queries.addFilesetToManage(fileset.id, \
                          myThread.runningFeeders[feederType])
            logging.info("Fileset %s is added to feeder %s" %(fileset.id, \
                          myThread.runningFeeders[feederType]))
        else:

            # If fileset already exist a new subscription
            # will be created for its workflow
            logging.info("Fileset exists: Subscription will be created for it")

            # Open it if close
            fileset.load()
            if fileset.open == False:

                fileset.markOpen(True)

                logging.info("Getting Feeder from DB")
                feederId = self.queries.getFeederId(feederType)
                logging.info(feederId)
                myThread.runningFeeders[feederType] = feederId

                self.queries.addFilesetToManage(fileset.id, \
                                  myThread.runningFeeders[feederType])
                logging.info("Fileset %s is added to feeder %s" %(fileset.id, \
                                  myThread.runningFeeders[feederType]))

        myThread.runningFeedersLock.release()
        myThread.msgService.finish()
Пример #12
0
    def setupPromptRecoWorkflow(self):
        """
        _setupPromptRecoWorkflow_

        Populate WMBS with a real PromptReco workflow,
        every subscription must be unfinished at first
        """

        # Populate disk and WMBS
        testArguments = PromptRecoWorkloadFactory.getTestArguments()

        workflowName = 'PromptReco_Run195360_Cosmics'
        factory = PromptRecoWorkloadFactory()
        testArguments["EnableHarvesting"] = True
        testArguments["CouchURL"] = os.environ["COUCHURL"]
        workload = factory.factoryWorkloadConstruction(workflowName,
                                                       testArguments)

        wmbsHelper = WMBSHelper(workload,
                                'Reco',
                                'SomeBlock',
                                cachepath=self.testDir)
        wmbsHelper.createTopLevelFileset()
        wmbsHelper._createSubscriptionsInWMBS(wmbsHelper.topLevelTask,
                                              wmbsHelper.topLevelFileset)

        self.stateMap = {
            'AlcaSkim': [],
            'Merge': [],
            'Harvesting': [],
            'Processing Done': []
        }
        self.orderedStates = [
            'AlcaSkim', 'Merge', 'Harvesting', 'Processing Done'
        ]

        # Populate WMStats
        self.requestDBWriter.insertGenericRequest(
            {'RequestName': workflowName})
        self.requestDBWriter.updateRequestStatus(workflowName, 'Closed')

        topLevelTask = '/%s/Reco' % workflowName
        alcaSkimTask = '%s/AlcaSkim' % topLevelTask
        mergeTasks = [
            '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics',
            '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T',
            '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics',
            '%s/RecoMergewrite_AOD', '%s/RecoMergewrite_DQM',
            '%s/RecoMergewrite_RECO'
        ]
        harvestingTask = '%s/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged' % topLevelTask

        self.stateMap['AlcaSkim'].append(wmbsHelper.topLevelSubscription)

        alcaSkimWorkflow = Workflow(name=workflowName, task=alcaSkimTask)
        alcaSkimWorkflow.load()
        alcarecoFileset = Fileset(
            name=
            '/PromptReco_Run195360_Cosmics/Reco/unmerged-write_ALCARECOALCARECO'
        )
        alcarecoFileset.load()
        alcaSkimSub = Subscription(alcarecoFileset, alcaSkimWorkflow)
        alcaSkimSub.load()
        self.stateMap['Merge'].append(alcaSkimSub)

        for task in mergeTasks:
            mergeTask = task % topLevelTask
            mergeWorkflow = Workflow(name=workflowName, task=mergeTask)
            mergeWorkflow.load()
            if 'AlcaSkim' in mergeTask:
                stream = mergeTask.split('/')[-1][13:]
                unmergedFileset = Fileset(name='%s/unmerged-%sALCARECO' %
                                          (alcaSkimTask, stream))
                unmergedFileset.load()
            else:
                dataTier = mergeTask.split('/')[-1].split('_')[-1]
                unmergedFileset = Fileset(name='%s/unmerged-write_%s%s' %
                                          (topLevelTask, dataTier, dataTier))
                unmergedFileset.load()
            mergeSub = Subscription(unmergedFileset, mergeWorkflow)
            mergeSub.load()
            self.stateMap['Harvesting'].append(mergeSub)

        harvestingWorkflow = Workflow(name=workflowName, task=harvestingTask)
        harvestingWorkflow.load()
        harvestingFileset = Fileset(
            name=
            '/PromptReco_Run195360_Cosmics/Reco/RecoMergewrite_DQM/merged-MergedDQM'
        )
        harvestingFileset.load()
        harvestingSub = Subscription(harvestingFileset, harvestingWorkflow)
        harvestingSub.load()
        self.stateMap['Processing Done'].append(harvestingSub)

        return
Пример #13
0
    def __call__(self, filesetToProcess):
        """
        The algorithm itself
        """

        # Get configuration
        initObj = WMInit()
        initObj.setLogging()
        initObj.setDatabaseConnection(os.getenv("DATABASE"), \
            os.getenv('DIALECT'), os.getenv("DBSOCK"))

        myThread = threading.currentThread()

        daofactory = DAOFactory(package = "WMCore.WMBS" , \
              logger = myThread.logger, \
              dbinterface = myThread.dbi)

        lastFileset = daofactory(classname = "Fileset.ListFilesetByTask")
        lastWorkflow = daofactory(classname = "Workflow.LoadFromTask")
        subsRun = daofactory(\
classname = "Subscriptions.LoadFromFilesetWorkflow")
        successJob = daofactory(classname = "Subscriptions.SucceededJobs")
        allJob = daofactory(classname = "Subscriptions.Jobs")
        fileInFileset = daofactory(classname = "Files.InFileset")


        # Get the start Run if asked
        startRun = (filesetToProcess.name).split(":")[3]
        logging.debug("the T0Feeder is processing %s" % \
                 filesetToProcess.name)
        logging.debug("the fileset name %s" % \
         (filesetToProcess.name).split(":")[0])

        fileType = (filesetToProcess.name).split(":")[2]
        crabTask = filesetToProcess.name.split(":")[0]
        LASTIME = filesetToProcess.lastUpdate

        tries = 1
        while True:

            try:

                myRequester = JSONRequests(url = "vocms52.cern.ch:8889")
                requestResult = myRequester.get("/tier0/runs")

            except:

                logging.debug("T0Reader call error...")
                if tries == self.maxRetries:
                    return
                else:
                    tries += 1
                    continue


            logging.debug("T0ASTRunChain feeder queries done ...")
            now = time.time()

            break


        for listRun in requestResult[0]:


            if startRun != 'None' and int(listRun['run']) >= int(startRun):
                if listRun['status'] =='CloseOutExport' or listRun\
        ['status']=='Complete' or listRun['status']=='CloseOutT1Skimming':

                    crabWorkflow = lastWorkflow.execute(task=crabTask)

                    crabFileset = lastFileset.execute\
                                (task=crabTask)

                    crabrunFileset = Fileset(\
    name = crabFileset[0]["name"].split(':')[0].split\
   ('-Run')[0]+ '-Run' + str(listRun['run']) + ":" + \
     ":".join(crabFileset[0]['name'].split(':')[1:]) )

                    if crabrunFileset.exists() > 0:

                        crabrunFileset.load()
                        currSubs = subsRun.execute\
           (crabrunFileset.id, crabWorkflow[0]['id'])

                        if currSubs:

                            listsuccessJob = successJob.execute(\
                                 subscription=currSubs['id'])
                            listallJob = allJob.execute(\
                                subscription=currSubs['id'])

                            if len(listsuccessJob) == len(listallJob):

                                for currid in listsuccessJob:
                                    currjob = Job( id = currid )
                                    currjob.load()

                                    logging.debug("Reading FJR %s" %currjob['fwjr_path'])

                                    jobReport = readJobReport(currjob['fwjr_path'])

                                    if len(jobReport) > 0:


                                        if jobReport[0].files:

                                            for newFile in jobReport[0].files:

                                                logging.debug(\
                               "Output path %s" %newFile['LFN'])
                                                newFileToAdd = File(\
                             lfn=newFile['LFN'], locations ='caf.cern.ch')

                                                LOCK.acquire()

                                                if newFileToAdd.exists\
                                                      () == False :

                                                    newFileToAdd.create()
                                                else:
                                                    newFileToAdd.loadData()

                                                LOCK.release()

                                                listFile = \
                             fileInFileset.execute(filesetToProcess.id)
                                                if {'fileid': \
                                 newFileToAdd['id']} not in listFile:

                                                    filesetToProcess.addFile(\
                                                        newFileToAdd)
                                                    filesetToProcess\
                                                    .setLastUpdate(now)
                                                    filesetToProcess.commit()
                                                    logging.debug(\
                                                     "new file created/loaded and added by T0ASTRunChain...")

                                        elif jobReport[0].analysisFiles:

                                            for newFile in jobReport\
                                                [0].analysisFiles:


                                                logging.debug(\
                             "Ouput path %s " %newFile['LFN'])
                                                newFileToAdd = File(\
                               lfn=newFile['LFN'], locations ='caf.cern.ch')

                                                LOCK.acquire()

                                                if newFileToAdd.exists\
                                                     () == False :
                                                    newFileToAdd.create()
                                                else:
                                                    newFileToAdd.loadData()

                                                LOCK.release()

                                                listFile = \
                              fileInFileset.execute(filesetToProcess.id)
                                                if {'fileid': newFileToAdd\
                                                  ['id']} not in listFile:

                                                    logging.debug\
                                             ("%s loaded and added by T0ASTRunChain" %newFile['LFN'])
                                                    filesetToProcess.addFile\
                                                         (newFileToAdd)
                                                    filesetToProcess.\
                                                       setLastUpdate(now)
                                                    filesetToProcess.commit()
                                                    logging.debug(\
                                                      "new file created/loaded added by T0ASTRunChain...")

                                        else: break #Missed fjr - Try next time


        # Commit the fileset
        logging.debug("Test purge in T0ASTRunChain ...")
        filesetToProcess.load()
        LASTIME = filesetToProcess.lastUpdate


        # For re-opned fileset or empty, try until the purge time
        if (int(now)/3600 - LASTIME/3600) > self.reopenTime:

            filesetToProcess.setLastUpdate(time.time())
            filesetToProcess.commit()

        if (int(now)/3600 - LASTIME/3600) > self.purgeTime:

            filesetToProcess.markOpen(False)
            logging.debug("Purge Done...")
Пример #14
0
    def __call__(self, filesetToProcess):
        """
        The algorithm itself
        """

        # Get configuration
        initObj = WMInit()
        initObj.setLogging()
        initObj.setDatabaseConnection(os.getenv("DATABASE"), \
            os.getenv('DIALECT'), os.getenv("DBSOCK"))

        myThread = threading.currentThread()

        daofactory = DAOFactory(package = "WMCore.WMBS" , \
              logger = myThread.logger, \
              dbinterface = myThread.dbi)

        lastFileset = daofactory(classname="Fileset.ListFilesetByTask")
        lastWorkflow = daofactory(classname="Workflow.LoadFromTask")
        subsRun = daofactory(\
classname = "Subscriptions.LoadFromFilesetWorkflow")
        successJob = daofactory(classname="Subscriptions.SucceededJobs")
        allJob = daofactory(classname="Subscriptions.Jobs")
        fileInFileset = daofactory(classname="Files.InFileset")

        # Get the start Run if asked
        startRun = (filesetToProcess.name).split(":")[3]
        logging.debug("the T0Feeder is processing %s" % \
                 filesetToProcess.name)
        logging.debug("the fileset name %s" % \
         (filesetToProcess.name).split(":")[0])

        fileType = (filesetToProcess.name).split(":")[2]
        crabTask = filesetToProcess.name.split(":")[0]
        LASTIME = filesetToProcess.lastUpdate

        tries = 1
        while True:

            try:

                myRequester = JSONRequests(url="vocms52.cern.ch:8889")
                requestResult = myRequester.get("/tier0/runs")

            except:

                logging.debug("T0Reader call error...")
                if tries == self.maxRetries:
                    return
                else:
                    tries += 1
                    continue

            logging.debug("T0ASTRunChain feeder queries done ...")
            now = time.time()

            break

        for listRun in requestResult[0]:

            if startRun != 'None' and int(listRun['run']) >= int(startRun):
                if listRun['status'] =='CloseOutExport' or listRun\
        ['status']=='Complete' or listRun['status']=='CloseOutT1Skimming':

                    crabWorkflow = lastWorkflow.execute(task=crabTask)

                    crabFileset = lastFileset.execute\
                                (task=crabTask)

                    crabrunFileset = Fileset(\
    name = crabFileset[0]["name"].split(':')[0].split\
   ('-Run')[0]+ '-Run' + str(listRun['run']) + ":" + \
     ":".join(crabFileset[0]['name'].split(':')[1:]) )

                    if crabrunFileset.exists() > 0:

                        crabrunFileset.load()
                        currSubs = subsRun.execute\
           (crabrunFileset.id, crabWorkflow[0]['id'])

                        if currSubs:

                            listsuccessJob = successJob.execute(\
                                 subscription=currSubs['id'])
                            listallJob = allJob.execute(\
                                subscription=currSubs['id'])

                            if len(listsuccessJob) == len(listallJob):

                                for currid in listsuccessJob:
                                    currjob = Job(id=currid)
                                    currjob.load()

                                    logging.debug("Reading FJR %s" %
                                                  currjob['fwjr_path'])

                                    jobReport = readJobReport(
                                        currjob['fwjr_path'])

                                    if len(jobReport) > 0:

                                        if jobReport[0].files:

                                            for newFile in jobReport[0].files:

                                                logging.debug(\
                               "Output path %s" %newFile['LFN'])
                                                newFileToAdd = File(\
                             lfn=newFile['LFN'], locations ='caf.cern.ch')

                                                LOCK.acquire()

                                                if newFileToAdd.exists\
                                                      () == False :

                                                    newFileToAdd.create()
                                                else:
                                                    newFileToAdd.loadData()

                                                LOCK.release()

                                                listFile = \
                             fileInFileset.execute(filesetToProcess.id)
                                                if {'fileid': \
                                 newFileToAdd['id']} not in listFile:

                                                    filesetToProcess.addFile(\
                                                        newFileToAdd)
                                                    filesetToProcess\
                                                    .setLastUpdate(now)
                                                    filesetToProcess.commit()
                                                    logging.debug(\
                                                     "new file created/loaded and added by T0ASTRunChain...")

                                        elif jobReport[0].analysisFiles:

                                            for newFile in jobReport\
                                                [0].analysisFiles:


                                                logging.debug(\
                             "Ouput path %s " %newFile['LFN'])
                                                newFileToAdd = File(\
                               lfn=newFile['LFN'], locations ='caf.cern.ch')

                                                LOCK.acquire()

                                                if newFileToAdd.exists\
                                                     () == False :
                                                    newFileToAdd.create()
                                                else:
                                                    newFileToAdd.loadData()

                                                LOCK.release()

                                                listFile = \
                              fileInFileset.execute(filesetToProcess.id)
                                                if {'fileid': newFileToAdd\
                                                  ['id']} not in listFile:

                                                    logging.debug\
                                             ("%s loaded and added by T0ASTRunChain" %newFile['LFN'])
                                                    filesetToProcess.addFile\
                                                         (newFileToAdd)
                                                    filesetToProcess.\
                                                       setLastUpdate(now)
                                                    filesetToProcess.commit()
                                                    logging.debug(\
                                                      "new file created/loaded added by T0ASTRunChain...")

                                        else:
                                            break  #Missed fjr - Try next time

        # Commit the fileset
        logging.debug("Test purge in T0ASTRunChain ...")
        filesetToProcess.load()
        LASTIME = filesetToProcess.lastUpdate

        # For re-opned fileset or empty, try until the purge time
        if (int(now) / 3600 - LASTIME / 3600) > self.reopenTime:

            filesetToProcess.setLastUpdate(time.time())
            filesetToProcess.commit()

        if (int(now) / 3600 - LASTIME / 3600) > self.purgeTime:

            filesetToProcess.markOpen(False)
            logging.debug("Purge Done...")
Пример #15
0
class ExpressTest(unittest.TestCase):
    """
    _ExpressTest_

    Test for Express job splitter
    """
    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules=["T0.WMBS"])

        self.splitterFactory = SplitterFactory(package="T0.JobSplitting")

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package="T0.WMBS",
                                logger=logging,
                                dbinterface=myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state)
                                    VALUES (1, 'SomeSite', 1)
                                    """,
                                 transaction=False)
        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 'SomePNN')
                                    """,
                                 transaction=False)

        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 'SomePNN2')
                                    """,
                                 transaction=False)

        insertRunDAO = daoFactory(classname="RunConfig.InsertRun")
        insertRunDAO.execute(binds={
            'RUN': 1,
            'TIME': int(time.time()),
            'HLTKEY': "someHLTKey"
        },
                             transaction=False)

        insertLumiDAO = daoFactory(classname="RunConfig.InsertLumiSection")
        for lumi in [1, 2]:
            insertLumiDAO.execute(binds={
                'RUN': 1,
                'LUMI': lumi
            },
                                  transaction=False)

        insertStreamDAO = daoFactory(classname="RunConfig.InsertStream")
        insertStreamDAO.execute(binds={'STREAM': "Express"}, transaction=False)

        insertStreamFilesetDAO = daoFactory(
            classname="RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "Express", "TestFileset1")

        self.fileset1 = Fileset(name="TestFileset1")
        self.fileset1.load()

        workflow1 = Workflow(spec="spec.xml",
                             owner="hufnagel",
                             name="TestWorkflow1",
                             task="Test")
        workflow1.create()

        self.subscription1 = Subscription(fileset=self.fileset1,
                                          workflow=workflow1,
                                          split_algo="Express",
                                          type="Express")
        self.subscription1.create()

        # keep for later
        self.insertClosedLumiDAO = daoFactory(
            classname="RunLumiCloseout.InsertClosedLumi")
        self.releaseExpressDAO = daoFactory(
            classname="Tier0Feeder.ReleaseExpress")
        self.currentTime = int(time.time())

        return

    def tearDown(self):
        """
        _tearDown_

        """
        self.testInit.clearDatabase()

        return

    def finalCloseLumis(self):
        """
        _finalCloseLumis_

        """
        myThread = threading.currentThread()

        myThread.dbi.processData("""UPDATE lumi_section_closed
                                    SET close_time = 1
                                    """,
                                 transaction=False)

        return

    def getNumActiveSplitLumis(self):
        """
        _getNumActiveSplitLumis_

        helper function that counts the number of active split lumis
        """
        myThread = threading.currentThread()

        results = myThread.dbi.processData("""SELECT COUNT(*)
                                              FROM lumi_section_split_active
                                              """,
                                           transaction=False)[0].fetchall()

        return results[0][0]

    def test00(self):
        """
        _test00_

        Test that the job name prefix feature works
        Test event threshold (single job creation)

        Test that only closed lumis are used

        Test check on express release

        """
        insertClosedLumiBinds = []
        for lumi in [1]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append({
                    'RUN': 1,
                    'LUMI': lumi,
                    'STREAM': "Express",
                    'FILECOUNT': filecount,
                    'INSERT_TIME': self.currentTime,
                    'CLOSE_TIME': 0
                })
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        jobGroups = jobFactory(maxInputEvents=200)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds,
                                         transaction=False)

        jobGroups = jobFactory(maxInputEvents=200)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.finalCloseLumis()

        jobGroups = jobFactory(maxInputEvents=200)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.releaseExpressDAO.execute(binds={'RUN': 1}, transaction=False)

        jobGroups = jobFactory(maxInputEvents=200)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("Express-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(self.getNumActiveSplitLumis(), 0,
                         "ERROR: Split lumis were created")

        return

    def test01(self):
        """
        _test01_

        Test event threshold (multiple job creation)

        """
        insertClosedLumiBinds = []
        for lumi in [1]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append({
                    'RUN': 1,
                    'LUMI': lumi,
                    'STREAM': "Express",
                    'FILECOUNT': filecount,
                    'INSERT_TIME': self.currentTime,
                    'CLOSE_TIME': self.currentTime
                })
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds,
                                         transaction=False)

        self.releaseExpressDAO.execute(binds={'RUN': 1}, transaction=False)

        jobGroups = jobFactory(maxInputEvents=199)

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        self.assertEqual(self.getNumActiveSplitLumis(), 1,
                         "ERROR: Didn't create a single split lumi")

        return

    def test02(self):
        """
        _test02_

        Test multi lumis

        """
        insertClosedLumiBinds = []
        for lumi in [1, 2]:
            filecount = 1
            for i in range(filecount):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append({
                    'RUN': 1,
                    'LUMI': lumi,
                    'STREAM': "Express",
                    'FILECOUNT': filecount,
                    'INSERT_TIME': self.currentTime,
                    'CLOSE_TIME': self.currentTime
                })
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds,
                                         transaction=False)

        self.releaseExpressDAO.execute(binds={'RUN': 1}, transaction=False)

        jobGroups = jobFactory(maxInputEvents=100)

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        self.assertEqual(self.getNumActiveSplitLumis(), 0,
                         "ERROR: Split lumis were created")

        return
Пример #16
0
class JobGroup(WMBSBase, WMJobGroup):
    """
    A group (set) of Jobs
    """
    def __init__(self,
                 subscription=None,
                 jobs=None,
                 id=-1,
                 uid=None,
                 location=None):
        WMBSBase.__init__(self)
        WMJobGroup.__init__(self, subscription=subscription, jobs=jobs)

        self.id = id
        self.lastUpdate = None
        self.uid = uid

        if location != None:
            self.setSite(location)

        return

    def create(self):
        """
        Add the new jobgroup to WMBS, create the output Fileset object
        """
        myThread = threading.currentThread()
        existingTransaction = self.beginTransaction()

        #overwrite base class self.output for WMBS fileset
        self.output = Fileset(name=makeUUID())
        self.output.create()

        if self.uid == None:
            self.uid = makeUUID()

        action = self.daofactory(classname="JobGroup.New")
        action.execute(self.uid,
                       self.subscription["id"],
                       self.output.id,
                       conn=self.getDBConn(),
                       transaction=self.existingTransaction())

        self.id = self.exists()
        self.commitTransaction(existingTransaction)

        return

    def delete(self):
        """
        Remove a jobgroup from WMBS
        """
        deleteAction = self.daofactory(classname="JobGroup.Delete")
        deleteAction.execute(id=self.id,
                             conn=self.getDBConn(),
                             transaction=self.existingTransaction())

        return

    def exists(self):
        """
        Does a jobgroup exist with id if id is not provided, use the uid,
        return the id
        """
        if self.id != -1:
            action = self.daofactory(classname="JobGroup.ExistsByID")
            result = action.execute(id=self.id,
                                    conn=self.getDBConn(),
                                    transaction=self.existingTransaction())
        else:
            action = self.daofactory(classname="JobGroup.Exists")
            result = action.execute(uid=self.uid,
                                    conn=self.getDBConn(),
                                    transaction=self.existingTransaction())

        return result

    def load(self):
        """
        _load_

        Load all meta data associated with the JobGroup.  This includes the
        JobGroup id, uid, last_update time, subscription id and output fileset
        id.  Either the JobGroup id or uid must be specified for this to work.
        """
        existingTransaction = self.beginTransaction()

        if self.id > 0:
            loadAction = self.daofactory(classname="JobGroup.LoadFromID")
            result = loadAction.execute(self.id,
                                        conn=self.getDBConn(),
                                        transaction=self.existingTransaction())
        else:
            loadAction = self.daofactory(classname="JobGroup.LoadFromUID")
            result = loadAction.execute(self.uid,
                                        conn=self.getDBConn(),
                                        transaction=self.existingTransaction())

        self.id = result["id"]
        self.uid = result["uid"]
        self.lastUpdate = result["last_update"]

        self.subscription = Subscription(id=result["subscription"])
        self.subscription.load()

        self.output = Fileset(id=result["output"])
        self.output.load()

        self.jobs = []
        self.commitTransaction(existingTransaction)
        return

    def loadData(self):
        """
        _loadData_

        Load all data that is associated with the jobgroup.  This includes
        loading all the subscription information, the output fileset
        information and all the jobs that are associated with the group.
        """
        existingTransaction = self.beginTransaction()

        if self.id < 0 or self.uid == None:
            self.load()

        self.subscription.loadData()
        self.output.loadData()

        loadAction = self.daofactory(classname="JobGroup.LoadJobs")
        result = loadAction.execute(self.id,
                                    conn=self.getDBConn(),
                                    transaction=self.existingTransaction())

        self.jobs = []
        self.newjobs = []

        for jobID in result:
            newJob = Job(id=jobID["id"])
            newJob.loadData()
            self.add(newJob)

        WMJobGroup.commit(self)
        self.commitTransaction(existingTransaction)
        return

    def commit(self):
        """
        _commit_

        Write any new jobs to the database, creating them in the database if
        necessary.
        """
        existingTransaction = self.beginTransaction()

        if self.id == -1:
            self.create()

        for j in self.newjobs:
            j.create(group=self)

        WMJobGroup.commit(self)
        self.commitTransaction(existingTransaction)
        return

    def setSite(self, site_name=None):
        """
        Updates the jobGroup with a site_name from the wmbs_location table
        """
        if not self.exists():
            return

        action = self.daofactory(classname="JobGroup.SetSite")
        result = action.execute(site_name=site_name,
                                jobGroupID=self.id,
                                conn=self.getDBConn(),
                                transaction=self.existingTransaction())

        return result

    def getSite(self):
        """
        Updates the jobGroup with a site_name from the wmbs_location table
        """
        if not self.exists():
            return

        action = self.daofactory(classname="JobGroup.GetSite")
        result = action.execute(jobGroupID=self.id,
                                conn=self.getDBConn(),
                                transaction=self.existingTransaction())

        return result

    def listJobIDs(self):
        """
        Returns a list of job IDs
        Useful for times when threading the loading of jobGroups, where running loadData can overload UUID
        """

        existingTransaction = self.beginTransaction()

        if self.id < 0 or self.uid == None:
            self.load()

        loadAction = self.daofactory(classname="JobGroup.LoadJobs")
        result = loadAction.execute(self.id,
                                    conn=self.getDBConn(),
                                    transaction=self.existingTransaction())

        jobIDList = []

        for jobID in result:
            jobIDList.append(jobID["id"])

        self.commitTransaction(existingTransaction)
        return jobIDList

    def commitBulk(self):
        """
        Creates jobs in a group instead of singly, as is done in jobGroup.commit()
        """

        myThread = threading.currentThread()

        if self.id == -1:
            myThread.transaction.begin()
            #existingTransaction = self.beginTransaction()
            self.create()
            #self.commitTransaction(existingTransaction)
            myThread.transaction.commit()

        existingTransaction = self.beginTransaction()

        listOfJobs = []
        for job in self.newjobs:
            #First do all the header stuff
            if job["id"] != None:
                continue

            job["jobgroup"] = self.id

            if job["name"] == None:
                job["name"] = makeUUID()

            listOfJobs.append(job)

        bulkAction = self.daofactory(classname="Jobs.New")
        result = bulkAction.execute(jobList=listOfJobs)

        #Use the results of the bulk commit to get the jobIDs
        fileDict = {}
        for job in listOfJobs:
            job['id'] = result[job['name']]
            fileDict[job['id']] = []
            for file in job['input_files']:
                fileDict[job['id']].append(file['id'])

        maskAction = self.daofactory(classname="Masks.New")
        maskAction.execute(jobList = listOfJobs, conn = self.getDBConn(), \
                           transaction = self.existingTransaction())

        fileAction = self.daofactory(classname="Jobs.AddFiles")
        fileAction.execute(jobDict = fileDict, conn = self.getDBConn(), \
                           transaction = self.existingTransaction())

        WMJobGroup.commit(self)
        self.commitTransaction(existingTransaction)

        return

    def getLocationsForJobs(self):
        """
        Gets a list of the locations that jobs can run at
        """
        if not self.exists():
            return

        action = self.daofactory(classname="JobGroup.GetLocationsForJobs")
        result = action.execute(id=self.id,
                                conn=self.getDBConn(),
                                transaction=self.existingTransaction())

        return result

    def __str__(self):
        """
        __str__

        Print out some information about the jobGroup
        as if jobGroup inherited from dict()
        """

        d = {
            'id': self.id,
            'uid': self.uid,
            'subscription': self.subscription,
            'output': self.output,
            'jobs': self.jobs,
            'newjobs': self.newjobs
        }

        return str(d)
Пример #17
0
    def setupPromptRecoWorkflow(self):
        """
        _setupPromptRecoWorkflow_

        Populate WMBS with a real PromptReco workflow,
        every subscription must be unfinished at first
        """

        # Populate disk and WMBS
        testArguments = PromptRecoWorkloadFactory.getTestArguments()

        workflowName = 'PromptReco_Run195360_Cosmics'
        factory = PromptRecoWorkloadFactory()
        testArguments["EnableHarvesting"] = True
        testArguments["CouchURL"] = os.environ["COUCHURL"]
        workload = factory.factoryWorkloadConstruction(workflowName, testArguments)

        wmbsHelper = WMBSHelper(workload, 'Reco', 'SomeBlock', cachepath=self.testDir)
        wmbsHelper.createTopLevelFileset()
        wmbsHelper._createSubscriptionsInWMBS(wmbsHelper.topLevelTask, wmbsHelper.topLevelFileset)

        self.stateMap = {'AlcaSkim': [],
                         'Merge': [],
                         'Harvesting': [],
                         'Processing Done': []}
        self.orderedStates = ['AlcaSkim', 'Merge', 'Harvesting', 'Processing Done']

        # Populate WMStats
        self.requestDBWriter.insertGenericRequest({'RequestName': workflowName})
        self.requestDBWriter.updateRequestStatus(workflowName, 'Closed')

        topLevelTask = '/%s/Reco' % workflowName
        alcaSkimTask = '%s/AlcaSkim' % topLevelTask
        mergeTasks = ['%s/AlcaSkim/AlcaSkimMergeALCARECOStreamHcalCalHOCosmics',
                      '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamTkAlCosmics0T',
                      '%s/AlcaSkim/AlcaSkimMergeALCARECOStreamMuAlGlobalCosmics',
                      '%s/RecoMergewrite_AOD',
                      '%s/RecoMergewrite_DQM',
                      '%s/RecoMergewrite_RECO']
        harvestingTask = '%s/RecoMergewrite_DQM/RecoMergewrite_DQMEndOfRunDQMHarvestMerged' % topLevelTask

        self.stateMap['AlcaSkim'].append(wmbsHelper.topLevelSubscription)

        alcaSkimWorkflow = Workflow(name=workflowName, task=alcaSkimTask)
        alcaSkimWorkflow.load()
        alcarecoFileset = Fileset(name='/PromptReco_Run195360_Cosmics/Reco/unmerged-write_ALCARECOALCARECO')
        alcarecoFileset.load()
        alcaSkimSub = Subscription(alcarecoFileset, alcaSkimWorkflow)
        alcaSkimSub.load()
        self.stateMap['Merge'].append(alcaSkimSub)

        for task in mergeTasks:
            mergeTask = task % topLevelTask
            mergeWorkflow = Workflow(name=workflowName, task=mergeTask)
            mergeWorkflow.load()
            if 'AlcaSkim' in mergeTask:
                stream = mergeTask.split('/')[-1][13:]
                unmergedFileset = Fileset(name='%s/unmerged-%sALCARECO' % (alcaSkimTask, stream))
                unmergedFileset.load()
            else:
                dataTier = mergeTask.split('/')[-1].split('_')[-1]
                unmergedFileset = Fileset(name='%s/unmerged-write_%s%s' % (topLevelTask, dataTier, dataTier))
                unmergedFileset.load()
            mergeSub = Subscription(unmergedFileset, mergeWorkflow)
            mergeSub.load()
            self.stateMap['Harvesting'].append(mergeSub)

        harvestingWorkflow = Workflow(name=workflowName, task=harvestingTask)
        harvestingWorkflow.load()
        harvestingFileset = Fileset(name='/PromptReco_Run195360_Cosmics/Reco/RecoMergewrite_DQM/merged-MergedDQM')
        harvestingFileset.load()
        harvestingSub = Subscription(harvestingFileset, harvestingWorkflow)
        harvestingSub.load()
        self.stateMap['Processing Done'].append(harvestingSub)

        return
Пример #18
0
    def __call__(self, parameters):
        """
        Perform the work required with the given parameters
        """
        DefaultSlave.__call__(self, parameters)

        # Handle the message
        message = self.messageArgs

        # Lock on the running feeders list
        myThread = threading.currentThread()
        myThread.runningFeedersLock.acquire()

        # Create empty fileset if fileset.name doesn't exist
        filesetName = message["dataset"]
        feederType = message["FeederType"]
        fileType = message["FileType"]
        startRun = message["StartRun"]

        logging.debug("Dataset " + filesetName + " arrived")

        fileset = Fileset(name = filesetName+':'\
          +feederType+':'+fileType+':'+startRun)

        # Check if the fileset is already there
        if fileset.exists() == False:

            # Empty fileset creation
            fileset.create()
            fileset.setLastUpdate(0)

            logging.info("Fileset %s whith id %s is added" \
                               %(fileset.name, str(fileset.id)))

            # Get feeder type
            feederType = message["FeederType"]

            # Check if there is a running feeder
            if feederType in myThread.runningFeeders:
                logging.info("HAVE FEEDER " + feederType + " RUNNING")
                logging.info(myThread.runningFeeders[feederType])

            else:
                logging.info("NO FEEDER " + feederType + " RUNNING")

                # Check if we have a feeder in DB
                if self.queries.checkFeeder(feederType):
                    # Have feeder, get info
                    logging.info("Getting Feeder from DB")
                    feederId = self.queries.getFeederId(feederType)
                    logging.info(feederId)
                    myThread.runningFeeders[feederType] = feederId
                else:
                    # Create feeder
                    logging.info("Adding Feeder to DB")
                    self.queries.addFeeder(feederType, "StatePath")
                    feederId = self.queries.getFeederId(feederType)
                    logging.info(feederId)
                    myThread.runningFeeders[feederType] = feederId

            # Fileset/Feeder association
            self.queries.addFilesetToManage(fileset.id, \
                          myThread.runningFeeders[feederType])
            logging.info("Fileset %s is added to feeder %s" %(fileset.id, \
                          myThread.runningFeeders[feederType]))
        else:

            # If fileset already exist a new subscription
            # will be created for its workflow
            logging.info("Fileset exists: Subscription will be created for it")

            # Open it if close
            fileset.load()
            if fileset.open == False:

                fileset.markOpen(True)

                logging.info("Getting Feeder from DB")
                feederId = self.queries.getFeederId(feederType)
                logging.info(feederId)
                myThread.runningFeeders[feederType] = feederId

                self.queries.addFilesetToManage(fileset.id, \
                                  myThread.runningFeeders[feederType])
                logging.info("Fileset %s is added to feeder %s" %(fileset.id, \
                                  myThread.runningFeeders[feederType]))

        myThread.runningFeedersLock.release()
        myThread.msgService.finish()
Пример #19
0
class ExpressTest(unittest.TestCase):
    """
    _ExpressTest_

    Test for Express job splitter
    """

    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules = ["WMComponent.DBS3Buffer", "T0.WMBS"])

        self.splitterFactory = SplitterFactory(package = "T0.JobSplitting")

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package = "T0.WMBS",
                                logger = logging,
                                dbinterface = myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state, state_time)
                                    VALUES (1, 'SomeSite', 1, 1)
                                    """, transaction = False)
        myThread.dbi.processData("""INSERT INTO wmbs_pnns
                                    (id, pnn)
                                    VALUES (2, 'SomePNN')
                                    """, transaction = False)

        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 2)
                                    """, transaction = False)


        insertRunDAO = daoFactory(classname = "RunConfig.InsertRun")
        insertRunDAO.execute(binds = { 'RUN' : 1,
                                       'HLTKEY' : "someHLTKey" },
                             transaction = False)

        insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection")
        for lumi in [1, 2]:
            insertLumiDAO.execute(binds = { 'RUN' : 1,
                                            'LUMI' : lumi },
                                  transaction = False)

        insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream")
        insertStreamDAO.execute(binds = { 'STREAM' : "Express" },
                                transaction = False)

        insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "Express", "TestFileset1")

        self.fileset1 = Fileset(name = "TestFileset1")
        self.fileset1.load()

        workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test")
        workflow1.create()

        self.subscription1  = Subscription(fileset = self.fileset1,
                                           workflow = workflow1,
                                           split_algo = "Express",
                                           type = "Express")
        self.subscription1.create()

        # keep for later
        self.insertClosedLumiDAO = daoFactory(classname = "RunLumiCloseout.InsertClosedLumi")
        self.releaseExpressDAO = daoFactory(classname = "Tier0Feeder.ReleaseExpress")
        self.currentTime = int(time.time())

        return

    def tearDown(self):
        """
        _tearDown_

        """
        self.testInit.clearDatabase()

        return

    def finalCloseLumis(self):
        """
        _finalCloseLumis_

        """
        myThread = threading.currentThread()

        myThread.dbi.processData("""UPDATE lumi_section_closed
                                    SET close_time = 1
                                    """, transaction = False)

        return

    def getNumActiveSplitLumis(self):
        """
        _getNumActiveSplitLumis_

        helper function that counts the number of active split lumis
        """
        myThread = threading.currentThread()

        results = myThread.dbi.processData("""SELECT COUNT(*)
                                              FROM lumi_section_split_active
                                              """, transaction = False)[0].fetchall()

        return results[0][0]

    def test00(self):
        """
        _test00_

        Test that the job name prefix feature works
        Test event threshold (single job creation)

        Test that only closed lumis are used

        Test check on express release

        """
        insertClosedLumiBinds = []
        for lumi in [1]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "Express",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : 0 } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        jobGroups = jobFactory(maxInputEvents = 200, maxInputRate = 23000)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        jobGroups = jobFactory(maxInputEvents = 200, maxInputRate = 23000)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.finalCloseLumis()

        jobGroups = jobFactory(maxInputEvents = 200, maxInputRate = 23000)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.releaseExpressDAO.execute(binds = { 'RUN' : 1 }, transaction = False)

        jobGroups = jobFactory(maxInputEvents = 200, maxInputRate = 23000)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("Express-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(self.getNumActiveSplitLumis(), 0,
                         "ERROR: Split lumis were created")

        return

    def test01(self):
        """
        _test01_

        Test event threshold (multiple job creation)

        """
        insertClosedLumiBinds = []
        for lumi in [1]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "Express",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        self.releaseExpressDAO.execute(binds = { 'RUN' : 1 }, transaction = False)

        jobGroups = jobFactory(maxInputEvents = 199, maxInputRate = 23000)

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        self.assertEqual(self.getNumActiveSplitLumis(), 1,
                         "ERROR: Didn't create a single split lumi")

        return

    def test02(self):
        """
        _test02_

        Test multi lumis

        """
        insertClosedLumiBinds = []
        for lumi in [1, 2]:
            filecount = 1
            for i in range(filecount):
                newFile = File(makeUUID(), size = 1000, events = 100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomePNN", immediateSave = False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append( { 'RUN' : 1,
                                                'LUMI' : lumi,
                                                'STREAM' : "Express",
                                                'FILECOUNT' : filecount,
                                                'INSERT_TIME' : self.currentTime,
                                                'CLOSE_TIME' : self.currentTime } )
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package = "WMCore.WMBS",
                                          subscription = self.subscription1)

        self.insertClosedLumiDAO.execute(binds = insertClosedLumiBinds,
                                         transaction = False)

        self.releaseExpressDAO.execute(binds = { 'RUN' : 1 }, transaction = False)

        jobGroups = jobFactory(maxInputEvents = 100, maxInputRate = 23000)

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        self.assertEqual(self.getNumActiveSplitLumis(), 0,
                         "ERROR: Split lumis were created")

        return
Пример #20
0
    def databaseWork(self):
        """
        Queries DB for all watched filesets, if a filesets matches become
        available, create the subscriptions
        """
        # Get all watched workflows
        availableWorkflows = self.getUnsubscribedWorkflows.execute()
        logging.debug("Found %s unsubscribed managed workflows" \
              % len(availableWorkflows))

        # Get all filesets to check if they match a wrokflow
        availableFilesets = self.getAllFilesets.execute()

        logging.debug("Found %s filesets" % len(availableFilesets))

        # Loop on unsubscribed workflows to match filesets
        for managedWorkflow in availableWorkflows:

            # Workflow object cache to pass into Subscription constructor
            wfObj = None

            for fileset in availableFilesets:

                # Fileset object cache
                fsObj = None

                # Load the location information
                #whitelist = Set()
                #blacklist = Set()
                # Location is only caf
                #locations = self.queries.getLocations(managedWorkflow['id'])
                #for location in locations:
                #    if bool(int(location['valid'])) == True:
                #        whitelist.add(location['site_name'])
                #    else:
                #        blacklist.add(location['site_name'])

                # Attempt to match workflows to filesets
                if re.match(managedWorkflow['fileset_match'], fileset['name']):

                    # Log in debug
                    msg = "Creating subscription for %s to workflow id %s"
                    msg %= (fileset['name'], managedWorkflow['workflow'])
                    logging.debug(msg)

                    # Match found - Load the fileset if not already loaded
                    if not fsObj:
                        fsObj = Fileset(id=fileset['id'])
                        fsObj.load()

                    # Load the workflow if not already loaded
                    if not wfObj:
                        wfObj = Workflow(id=managedWorkflow['workflow'])
                        wfObj.load()

                    # Create the subscription
                    newSub = Subscription(fileset = fsObj, \
                                     workflow = wfObj, \
                                     #whitelist = whitelist, \
                                     #blacklist = blacklist, \
                                     split_algo = managedWorkflow['split_algo'],
                                     type = managedWorkflow['type'])
                    newSub.create()

        managedWorkflows = self.getManagedWorkflows.execute()
        logging.debug("Found %s  managed workflows" \
              % len(managedWorkflows))

        unsubscribedFilesets = self.getUnsubscribedFilesets.execute()
        logging.debug("Found %s unsubscribed filesets" % \
                len(unsubscribedFilesets))

        # Loop on unsubscribed filesets to match workflows
        for unsubscribedFileset in unsubscribedFilesets:

            # Workflow object cache to pass into Subscription constructor
            # FIXME
            wfObj = None

            for managedWork in managedWorkflows:

                logging.debug("The workflow %s" % managedWork['workflow'])

                # Fileset object cache
                wfObj = None
                fsObj = None

                # Load the location information
                #whitelist = Set()
                #blacklist = Set()
                # Location is only caf
                #locations = self.queries.getLocations(managedWorkflow['id'])
                #for location in locations:
                #    if bool(int(location['valid'])) == True:
                #        whitelist.add(location['site_name'])
                #    else:
                #        blacklist.add(location['site_name'])

                # Attempt to match workflows to filesets
                if re.match(managedWork['fileset_match'], \
                     unsubscribedFileset['name']):
                    # Log in debug
                    msg = "Creating subscription for %s to workflow id %s"
                    msg %= (unsubscribedFileset['name'], \
                          managedWork['workflow'])
                    logging.debug(msg)

                    # Match found - Load the fileset if not already loaded
                    if not fsObj:
                        fsObj = Fileset(id=unsubscribedFileset['id'])
                        fsObj.load()

                    # Load the workflow if not already loaded
                    if not wfObj:
                        wfObj = Workflow(id=managedWork['workflow'])
                        wfObj.load()

                    # Create the subscription
                    newSub = Subscription(fileset = fsObj, \
                                     workflow = wfObj, \
                                     #whitelist = whitelist, \
                                     #blacklist = blacklist, \
                                     split_algo = managedWork['split_algo'],
                                     type = managedWork['type'])
                    newSub.create()
                    newSub.load()
Пример #21
0
    def setUp(self):
        """
        _setUp_
        """
        import WMQuality.TestInit
        WMQuality.TestInit.deleteDatabaseAfterEveryTest("I'm Serious")

        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(
            customModules=["WMComponent.DBS3Buffer", "T0.WMBS"])

        self.splitterFactory = SplitterFactory(package="T0.JobSplitting")

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package="T0.WMBS",
                                logger=logging,
                                dbinterface=myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state, state_time)
                                    VALUES (1, 'SomeSite', 1, 1)
                                    """,
                                 transaction=False)
        myThread.dbi.processData("""INSERT INTO wmbs_pnns
                                    (id, pnn)
                                    VALUES (2, 'SomePNN')
                                    """,
                                 transaction=False)

        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 2)
                                    """,
                                 transaction=False)

        insertRunDAO = daoFactory(classname="RunConfig.InsertRun")
        insertRunDAO.execute(binds={
            'RUN': 1,
            'HLTKEY': "someHLTKey"
        },
                             transaction=False)

        insertLumiDAO = daoFactory(classname="RunConfig.InsertLumiSection")
        for lumi in range(1, 5):
            insertLumiDAO.execute(binds={
                'RUN': 1,
                'LUMI': lumi
            },
                                  transaction=False)

        insertStreamDAO = daoFactory(classname="RunConfig.InsertStream")
        insertStreamDAO.execute(binds={'STREAM': "Express"}, transaction=False)

        insertStreamFilesetDAO = daoFactory(
            classname="RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "Express", "TestFileset1")

        fileset1 = Fileset(name="TestFileset1")
        self.fileset2 = Fileset(name="TestFileset2")
        fileset1.load()
        self.fileset2.create()

        workflow1 = Workflow(spec="spec.xml",
                             owner="hufnagel",
                             name="TestWorkflow1",
                             task="Test")
        workflow2 = Workflow(spec="spec.xml",
                             owner="hufnagel",
                             name="TestWorkflow2",
                             task="Test")
        workflow1.create()
        workflow2.create()

        self.subscription1 = Subscription(fileset=fileset1,
                                          workflow=workflow1,
                                          split_algo="Express",
                                          type="Express")
        self.subscription2 = Subscription(fileset=self.fileset2,
                                          workflow=workflow2,
                                          split_algo="ExpressMerge",
                                          type="ExpressMerge")
        self.subscription1.create()
        self.subscription2.create()

        myThread.dbi.processData("""INSERT INTO wmbs_workflow_output
                                    (WORKFLOW_ID, OUTPUT_IDENTIFIER, OUTPUT_FILESET)
                                    VALUES (%d, 'SOMEOUTPUT', %d)
                                    """ % (workflow1.id, self.fileset2.id),
                                 transaction=False)

        # keep for later
        self.insertSplitLumisDAO = daoFactory(
            classname="JobSplitting.InsertSplitLumis")

        # default split parameters
        self.splitArgs = {}
        self.splitArgs['maxInputSize'] = 2 * 1024 * 1024 * 1024
        self.splitArgs['maxInputFiles'] = 500,
        self.splitArgs['maxLatency'] = 15 * 23

        return
Пример #22
0
    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules = ["T0.WMBS"])

        self.splitterFactory = SplitterFactory(package = "T0.JobSplitting")

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package = "T0.WMBS",
                                logger = logging,
                                dbinterface = myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state)
                                    VALUES (1, 'SomeSite', 1)
                                    """, transaction = False)
        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 'SomePNN')
                                    """, transaction = False)
        
        myThread.dbi.processData("""INSERT INTO wmbs_location_pnns
                                    (location, pnn)
                                    VALUES (1, 'SomePNN2')
                                    """, transaction = False)

        insertRunDAO = daoFactory(classname = "RunConfig.InsertRun")
        insertRunDAO.execute(binds = { 'RUN' : 1,
                                       'TIME' : int(time.time()),
                                       'HLTKEY' : "someHLTKey" },
                             transaction = False)

        insertLumiDAO = daoFactory(classname = "RunConfig.InsertLumiSection")
        for lumi in range(1, 5):
            insertLumiDAO.execute(binds = { 'RUN' : 1,
                                            'LUMI' : lumi },
                                  transaction = False)

        insertStreamDAO = daoFactory(classname = "RunConfig.InsertStream")
        insertStreamDAO.execute(binds = { 'STREAM' : "Express" },
                                transaction = False)

        insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "Express", "TestFileset1")

        fileset1 = Fileset(name = "TestFileset1")
        self.fileset2 = Fileset(name = "TestFileset2")
        fileset1.load()
        self.fileset2.create()

        workflow1 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow1", task="Test")
        workflow2 = Workflow(spec = "spec.xml", owner = "hufnagel", name = "TestWorkflow2", task="Test")
        workflow1.create()
        workflow2.create()

        self.subscription1  = Subscription(fileset = fileset1,
                                           workflow = workflow1,
                                           split_algo = "Express",
                                           type = "Express")
        self.subscription2  = Subscription(fileset = self.fileset2,
                                           workflow = workflow2,
                                           split_algo = "ExpressMerge",
                                           type = "ExpressMerge")
        self.subscription1.create()
        self.subscription2.create()

        myThread.dbi.processData("""INSERT INTO wmbs_workflow_output
                                    (WORKFLOW_ID, OUTPUT_IDENTIFIER, OUTPUT_FILESET)
                                    VALUES (%d, 'SOMEOUTPUT', %d)
                                    """ % (workflow1.id, self.fileset2.id),
                                 transaction = False)

        # keep for later
        self.insertSplitLumisDAO = daoFactory(classname = "JobSplitting.InsertSplitLumis")

        # default split parameters
        self.splitArgs = {}
        self.splitArgs['maxInputSize'] = 2 * 1024 * 1024 * 1024
        self.splitArgs['maxInputFiles'] = 500,
        self.splitArgs['maxLatency'] = 15 * 23

        return
Пример #23
0
class RepackTest(unittest.TestCase):
    """
    _RepackTest_

    Test for Repack job splitter
    """
    def setUp(self):
        """
        _setUp_

        """
        self.testInit = TestInit(__file__)
        self.testInit.setLogging()
        self.testInit.setDatabaseConnection()

        self.testInit.setSchema(customModules=["T0.WMBS"])

        self.splitterFactory = SplitterFactory(package="T0.JobSplitting")

        myThread = threading.currentThread()
        daoFactory = DAOFactory(package="T0.WMBS",
                                logger=logging,
                                dbinterface=myThread.dbi)

        myThread.dbi.processData("""INSERT INTO wmbs_location
                                    (id, site_name, state)
                                    VALUES (1, 'SomeSite', 1)
                                    """,
                                 transaction=False)
        myThread.dbi.processData("""INSERT INTO wmbs_location_senames
                                    (location, se_name)
                                    VALUES (1, 'SomeSE')
                                    """,
                                 transaction=False)

        myThread.dbi.processData("""INSERT INTO wmbs_location_senames
                                    (location, se_name)
                                    VALUES (1, 'SomeSE2')
                                    """,
                                 transaction=False)

        insertRunDAO = daoFactory(classname="RunConfig.InsertRun")
        insertRunDAO.execute(binds={
            'RUN': 1,
            'TIME': int(time.time()),
            'HLTKEY': "someHLTKey"
        },
                             transaction=False)

        insertLumiDAO = daoFactory(classname="RunConfig.InsertLumiSection")
        for lumi in [1, 2, 3, 4]:
            insertLumiDAO.execute(binds={
                'RUN': 1,
                'LUMI': lumi
            },
                                  transaction=False)

        insertStreamDAO = daoFactory(classname="RunConfig.InsertStream")
        insertStreamDAO.execute(binds={'STREAM': "A"}, transaction=False)

        insertStreamFilesetDAO = daoFactory(
            classname="RunConfig.InsertStreamFileset")
        insertStreamFilesetDAO.execute(1, "A", "TestFileset1")

        self.fileset1 = Fileset(name="TestFileset1")
        self.fileset1.load()

        workflow1 = Workflow(spec="spec.xml",
                             owner="hufnagel",
                             name="TestWorkflow1",
                             task="Test")
        workflow1.create()

        self.subscription1 = Subscription(fileset=self.fileset1,
                                          workflow=workflow1,
                                          split_algo="Repack",
                                          type="Repack")
        self.subscription1.create()

        # keep for later
        self.insertClosedLumiDAO = daoFactory(
            classname="RunLumiCloseout.InsertClosedLumi")
        self.currentTime = int(time.time())

        # default split parameters
        self.splitArgs = {}
        self.splitArgs['maxSizeSingleLumi'] = 20 * 1024 * 1024 * 1024
        self.splitArgs['maxSizeMultiLumi'] = 10 * 1024 * 1024 * 1024
        self.splitArgs['maxInputEvents'] = 500000
        self.splitArgs['maxInputFiles'] = 1000

        return

    def tearDown(self):
        """
        _tearDown_

        """
        self.testInit.clearDatabase()

        return

    def getNumActiveSplitLumis(self):
        """
        _getNumActiveSplitLumis_

        helper function that counts the number of active split lumis
        """
        myThread = threading.currentThread()

        results = myThread.dbi.processData("""SELECT COUNT(*)
                                              FROM lumi_section_split_active
                                              """,
                                           transaction=False)[0].fetchall()

        return results[0][0]

    def test00(self):
        """
        _test00_

        Test that the job name prefix feature works
        Test multi lumi size threshold
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        for lumi in [1, 2, 3, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)

        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        mySplitArgs['maxSizeMultiLumi'] = self.splitArgs['maxSizeMultiLumi']
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxSizeMultiLumi'] = 5000
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("Repack-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset1.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertTrue(job['name'].startswith("Repack-"),
                        "ERROR: Job has wrong name")

        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 0,
                         "ERROR: Split lumis were created")

        return

    def test01(self):
        """
        _test01_

        Test multi lumi event threshold
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 3, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append({
                    'RUN': 1,
                    'LUMI': lumi,
                    'STREAM': "A",
                    'FILECOUNT': filecount,
                    'INSERT_TIME': self.currentTime,
                    'CLOSE_TIME': self.currentTime
                })
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds,
                                         transaction=False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputEvents'] = 500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset1.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 0,
                         "ERROR: Split lumis were created")

        return

    def test02(self):
        """
        _test02_

        Test single lumi size threshold
        Single lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1]:
            filecount = 8
            for i in range(filecount):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append({
                    'RUN': 1,
                    'LUMI': lumi,
                    'STREAM': "A",
                    'FILECOUNT': filecount,
                    'INSERT_TIME': self.currentTime,
                    'CLOSE_TIME': self.currentTime
                })
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds,
                                         transaction=False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxSizeSingleLumi'] = 6500
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 6,
                         "ERROR: Job does not process 6 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 1,
                         "ERROR: Split lumis were not created")

        return

    def test03(self):
        """
        _test03_

        Test single lumi event threshold
        Single lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1]:
            filecount = 8
            for i in range(filecount):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append({
                    'RUN': 1,
                    'LUMI': lumi,
                    'STREAM': "A",
                    'FILECOUNT': filecount,
                    'INSERT_TIME': self.currentTime,
                    'CLOSE_TIME': self.currentTime
                })
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds,
                                         transaction=False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputEvents'] = 650
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 2,
                         "ERROR: JobFactory didn't create two jobs")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 6,
                         "ERROR: Job does not process 6 files")

        job = jobGroups[0].jobs[1]
        self.assertEqual(len(job.getFiles()), 2,
                         "ERROR: Job does not process 2 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 1,
                         "ERROR: Split lumis were not created")

        return

    def test04(self):
        """
        _test04_

        Test streamer count threshold (only multi lumi)
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 3, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append({
                    'RUN': 1,
                    'LUMI': lumi,
                    'STREAM': "A",
                    'FILECOUNT': filecount,
                    'INSERT_TIME': self.currentTime,
                    'CLOSE_TIME': self.currentTime
                })
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds,
                                         transaction=False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        mySplitArgs['maxInputFiles'] = 5
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.fileset1.markOpen(False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create a single job")

        job = jobGroups[0].jobs[0]
        self.assertEqual(len(job.getFiles()), 4,
                         "ERROR: Job does not process 4 files")

        self.assertEqual(self.getNumActiveSplitLumis(), 0,
                         "ERROR: Split lumis were created")

        return

    def test05(self):
        """
        _test05_

        Test repacking of multiple lumis with holes in the lumi sequence
        Multi lumi input

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 4]:
            filecount = 2
            for i in range(filecount):
                newFile = File(makeUUID(), size=1000, events=100)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append({
                    'RUN': 1,
                    'LUMI': lumi,
                    'STREAM': "A",
                    'FILECOUNT': filecount,
                    'INSERT_TIME': self.currentTime,
                    'CLOSE_TIME': self.currentTime
                })
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds,
                                         transaction=False)

        mySplitArgs['maxInputFiles'] = 5
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 0,
                         "ERROR: JobFactory should have returned no JobGroup")

        self.insertClosedLumiDAO.execute(binds={
            'RUN': 1,
            'LUMI': 3,
            'STREAM': "A",
            'FILECOUNT': 0,
            'INSERT_TIME': self.currentTime,
            'CLOSE_TIME': self.currentTime
        },
                                         transaction=False)

        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 1,
                         "ERROR: JobFactory didn't create one job")

        self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4,
                         "ERROR: first job does not process 4 files")

        return

    def test06(self):
        """
        _test06_

        Test repacking of 3 lumis
        2 small lumis (single job), followed by a big one (multiple jobs)

        files for lumi 1 and 2 are below multi-lumi thresholds
        files for lumi 3 are above single-lumi threshold

        """
        mySplitArgs = self.splitArgs.copy()

        insertClosedLumiBinds = []
        for lumi in [1, 2, 3]:
            filecount = 2
            for i in range(filecount):
                if lumi == 3:
                    nevents = 500
                else:
                    nevents = 100
                newFile = File(makeUUID(), size=1000, events=nevents)
                newFile.addRun(Run(1, *[lumi]))
                newFile.setLocation("SomeSE", immediateSave=False)
                newFile.create()
                self.fileset1.addFile(newFile)
                insertClosedLumiBinds.append({
                    'RUN': 1,
                    'LUMI': lumi,
                    'STREAM': "A",
                    'FILECOUNT': filecount,
                    'INSERT_TIME': self.currentTime,
                    'CLOSE_TIME': self.currentTime
                })
        self.fileset1.commit()

        jobFactory = self.splitterFactory(package="WMCore.WMBS",
                                          subscription=self.subscription1)

        self.insertClosedLumiDAO.execute(binds=insertClosedLumiBinds,
                                         transaction=False)

        mySplitArgs['maxInputEvents'] = 900
        jobGroups = jobFactory(**mySplitArgs)

        self.assertEqual(len(jobGroups), 1,
                         "ERROR: JobFactory didn't return one JobGroup")

        self.assertEqual(len(jobGroups[0].jobs), 3,
                         "ERROR: JobFactory didn't create three jobs")

        self.assertEqual(len(jobGroups[0].jobs[0].getFiles()), 4,
                         "ERROR: first job does not process 4 files")

        self.assertEqual(len(jobGroups[0].jobs[1].getFiles()), 1,
                         "ERROR: second job does not process 1 file")

        self.assertEqual(len(jobGroups[0].jobs[2].getFiles()), 1,
                         "ERROR: third job does not process 1 file")

        return
Пример #24
0
def configureRunStream(tier0Config, run, stream, specDirectory, dqmUploadProxy):
    """
    _configureRunStream_

    Called by Tier0Feeder for new run/streams.

    Retrieve global run settings and build the part
    of the configuration relevant to run/stream
    and write it to the database.

    Create workflows, filesets and subscriptions for
    the processing of runs/streams.

    """
    logging.debug("configureRunStream() : %d , %s" % (run, stream))
    myThread = threading.currentThread()

    daoFactory = DAOFactory(package = "T0.WMBS",
                            logger = logging,
                            dbinterface = myThread.dbi)

    # retrieve some basic run information
    getRunInfoDAO = daoFactory(classname = "RunConfig.GetRunInfo")
    runInfo = getRunInfoDAO.execute(run, transaction = False)[0]

    #
    # treat centralDAQ or miniDAQ runs (have an HLT key) different from local runs
    #
    if runInfo['hltkey'] != None:

        # streams not explicitely configured are repacked
        if stream not in tier0Config.Streams.dictionary_().keys():
            addRepackConfig(tier0Config, stream)

        streamConfig = tier0Config.Streams.dictionary_()[stream]

        # write stream/dataset mapping (for special express and error datasets)
        insertDatasetDAO = daoFactory(classname = "RunConfig.InsertPrimaryDataset")
        insertStreamDatasetDAO = daoFactory(classname = "RunConfig.InsertStreamDataset")

        # write stream configuration
        insertStreamStyleDAO = daoFactory(classname = "RunConfig.InsertStreamStyle")
        insertRepackConfigDAO = daoFactory(classname = "RunConfig.InsertRepackConfig")
        insertPromptCalibrationDAO = daoFactory(classname = "RunConfig.InsertPromptCalibration")
        insertExpressConfigDAO = daoFactory(classname = "RunConfig.InsertExpressConfig")
        insertSpecialDatasetDAO = daoFactory(classname = "RunConfig.InsertSpecialDataset")
        insertDatasetScenarioDAO = daoFactory(classname = "RunConfig.InsertDatasetScenario")
        insertCMSSWVersionDAO = daoFactory(classname = "RunConfig.InsertCMSSWVersion")
        updateStreamOverrideDAO = daoFactory(classname = "RunConfig.UpdateStreamOverride")
        insertStreamFilesetDAO = daoFactory(classname = "RunConfig.InsertStreamFileset")
        insertRecoReleaseConfigDAO = daoFactory(classname = "RunConfig.InsertRecoReleaseConfig")
        insertWorkflowMonitoringDAO = daoFactory(classname = "RunConfig.InsertWorkflowMonitoring")
        insertStorageNodeDAO = daoFactory(classname = "RunConfig.InsertStorageNode")
        insertPhEDExConfigDAO = daoFactory(classname = "RunConfig.InsertPhEDExConfig")

        bindsDataset = []
        bindsStreamDataset = []
        bindsStreamStyle = {'RUN' : run,
                            'STREAM' : stream,
                            'STYLE': streamConfig.ProcessingStyle }
        bindsRepackConfig = {}
        bindsPromptCalibration = {}
        bindsExpressConfig = {}
        bindsSpecialDataset = {}
        bindsDatasetScenario = []
        bindsCMSSWVersion = []
        bindsStreamOverride = {}
        bindsStorageNode = []
        bindsPhEDExConfig = []

        # mark workflows as injected
        wmbsDaoFactory = DAOFactory(package = "WMCore.WMBS",
                                    logger = logging,
                                    dbinterface = myThread.dbi)
        markWorkflowsInjectedDAO = wmbsDaoFactory(classname = "Workflow.MarkInjectedWorkflows")

        #
        # for spec creation, details for all outputs
        #
        outputModuleDetails = []

        #
        # for PromptReco delay settings
        #
        promptRecoDelay = {}
        promptRecoDelayOffset = {}

        #
        # for PhEDEx subscription settings
        #
        subscriptions = { 'Express' : [],
                          'Bulk' : [] }

        # some hardcoded PhEDEx defaults
        expressPhEDExInjectNode = "T2_CH_CERN"
        expressPhEDExSubscribeNode = "T2_CH_CERN"

        #
        # first take care of all stream settings
        #
        getStreamOnlineVersionDAO = daoFactory(classname = "RunConfig.GetStreamOnlineVersion")
        onlineVersion = getStreamOnlineVersionDAO.execute(run, stream, transaction = False)

        if streamConfig.ProcessingStyle == "Bulk":

            bindsRepackConfig = { 'RUN' : run,
                                  'STREAM' : stream,
                                  'PROC_VER': streamConfig.Repack.ProcessingVersion,
                                  'MAX_SIZE_SINGLE_LUMI' : streamConfig.Repack.MaxSizeSingleLumi,
                                  'MAX_SIZE_MULTI_LUMI' : streamConfig.Repack.MaxSizeMultiLumi,
                                  'MIN_SIZE' : streamConfig.Repack.MinInputSize,
                                  'MAX_SIZE' : streamConfig.Repack.MaxInputSize,
                                  'MAX_EDM_SIZE' : streamConfig.Repack.MaxEdmSize,
                                  'MAX_OVER_SIZE' : streamConfig.Repack.MaxOverSize,
                                  'MAX_EVENTS' : streamConfig.Repack.MaxInputEvents,
                                  'MAX_FILES' : streamConfig.Repack.MaxInputFiles }

        elif streamConfig.ProcessingStyle == "Express":

            specialDataset = "Stream%s" % stream
            bindsDataset.append( { 'PRIMDS' : specialDataset } )
            bindsStreamDataset.append( { 'RUN' : run,
                                         'PRIMDS' : specialDataset,
                                         'STREAM' : stream } )
            bindsSpecialDataset = { 'STREAM' : stream,
                                    'PRIMDS' : specialDataset }
            bindsDatasetScenario.append( { 'RUN' : run,
                                           'PRIMDS' : specialDataset,
                                           'SCENARIO' : streamConfig.Express.Scenario } )

            if "DQM" in streamConfig.Express.DataTiers:
                outputModuleDetails.append( { 'dataTier' : "DQM",
                                              'eventContent' : "DQM",
                                              'primaryDataset' : specialDataset } )

            bindsStorageNode.append( { 'NODE' : expressPhEDExSubscribeNode } )

            bindsPhEDExConfig.append( { 'RUN' : run,
                                        'PRIMDS' : specialDataset,
                                        'NODE' : expressPhEDExSubscribeNode,
                                        'CUSTODIAL' : 1,
                                        'REQ_ONLY' : "n",
                                        'PRIO' : "high" } )

            subscriptions['Express'].append( { 'custodialSites' : [],
                                               'nonCustodialSites' : [expressPhEDExSubscribeNode],
                                               'autoApproveSites' : [expressPhEDExSubscribeNode],
                                               'priority' : "high",
                                               'primaryDataset' : specialDataset } )

            alcaSkim = None
            if "ALCARECO" in streamConfig.Express.DataTiers:
                if len(streamConfig.Express.AlcaSkims) > 0:
                    outputModuleDetails.append( { 'dataTier' : "ALCARECO",
                                                  'eventContent' : "ALCARECO",
                                                  'primaryDataset' : specialDataset } )
                    alcaSkim = ",".join(streamConfig.Express.AlcaSkims)

                    if "PromptCalibProd" in streamConfig.Express.AlcaSkims:
                        bindsPromptCalibration = { 'RUN' : run,
                                                   'STREAM' : stream }

            dqmSeq = None
            if len(streamConfig.Express.DqmSequences) > 0:
                dqmSeq = ",".join(streamConfig.Express.DqmSequences)

            bindsExpressConfig = { 'RUN' : run,
                                   'STREAM' : stream,
                                   'PROC_VER' : streamConfig.Express.ProcessingVersion,
                                   'WRITE_TIERS' : ",".join(streamConfig.Express.DataTiers),
                                   'GLOBAL_TAG' : streamConfig.Express.GlobalTag,
                                   'MAX_EVENTS' : streamConfig.Express.MaxInputEvents,
                                   'MAX_SIZE' : streamConfig.Express.MaxInputSize,
                                   'MAX_FILES' : streamConfig.Express.MaxInputFiles,
                                   'MAX_LATENCY' : streamConfig.Express.MaxLatency,
                                   'ALCA_SKIM' : alcaSkim,
                                   'DQM_SEQ' : dqmSeq }

        overrideVersion = streamConfig.VersionOverride.get(onlineVersion, None)
        if overrideVersion != None:
            bindsCMSSWVersion.append( { 'VERSION' : overrideVersion } )
            bindsStreamOverride =  { "RUN" : run,
                                     "STREAM" : stream,
                                     "OVERRIDE" : overrideVersion }

        #
        # then configure datasets
        #
        getStreamDatasetTriggersDAO = daoFactory(classname = "RunConfig.GetStreamDatasetTriggers")
        datasetTriggers = getStreamDatasetTriggersDAO.execute(run, stream, transaction = False)

        for dataset, paths in datasetTriggers.items():

            if dataset == "Unassigned path":
                if stream == "Express" and run in [ 210114, 210116, 210120, 210121, 210178 ]:
                    continue

            datasetConfig = retrieveDatasetConfig(tier0Config, dataset)

            selectEvents = []
            for path in sorted(paths):
                selectEvents.append("%s:%s" % (path, runInfo['process']))

            if streamConfig.ProcessingStyle == "Bulk":

                promptRecoDelay[datasetConfig.Name] = datasetConfig.RecoDelay
                promptRecoDelayOffset[datasetConfig.Name] = datasetConfig.RecoDelayOffset

                outputModuleDetails.append( { 'dataTier' : "RAW",
                                              'eventContent' : "ALL",
                                              'selectEvents' : selectEvents,
                                              'primaryDataset' : dataset } )

                custodialSites = []
                nonCustodialSites = []
                autoApproveSites = []

                if datasetConfig.CustodialNode != None:

                    custodialSites.append(datasetConfig.CustodialNode)

                    requestOnly = "y"
                    if datasetConfig.CustodialAutoApprove:
                        requestOnly = "n"
                        autoApproveSites.append(datasetConfig.CustodialNode)

                    bindsStorageNode.append( { 'NODE' : datasetConfig.CustodialNode } )

                    bindsPhEDExConfig.append( { 'RUN' : run,
                                                'PRIMDS' : dataset,
                                                'NODE' : datasetConfig.CustodialNode,
                                                'CUSTODIAL' : 1,
                                                'REQ_ONLY' : requestOnly,
                                                'PRIO' : datasetConfig.CustodialPriority } )

                if datasetConfig.ArchivalNode != None:

                    custodialSites.append(datasetConfig.ArchivalNode)
                    autoApproveSites.append(datasetConfig.ArchivalNode)

                    bindsStorageNode.append( { 'NODE' : datasetConfig.ArchivalNode } )

                    bindsPhEDExConfig.append( { 'RUN' : run,
                                                'PRIMDS' : dataset,
                                                'NODE' : datasetConfig.ArchivalNode,
                                                'CUSTODIAL' : 1,
                                                'REQ_ONLY' : "n",
                                                'PRIO' : datasetConfig.CustodialPriority } )

                if len(custodialSites) + len(nonCustodialSites) > 0:
                    subscriptions['Bulk'].append( { 'custodialSites' : custodialSites,
                                                    'nonCustodialSites' : nonCustodialSites,
                                                    'autoApproveSites' : autoApproveSites,
                                                    'priority' : datasetConfig.CustodialPriority,
                                                    'primaryDataset' : dataset } )

            elif streamConfig.ProcessingStyle == "Express":

                for dataTier in streamConfig.Express.DataTiers:
                    if dataTier not in [ "ALCARECO", "DQM" ]:

                        outputModuleDetails.append( { 'dataTier' : dataTier,
                                                      'eventContent' : dataTier,
                                                      'selectEvents' : selectEvents,
                                                      'primaryDataset' : dataset } )

                bindsPhEDExConfig.append( { 'RUN' : run,
                                            'PRIMDS' : dataset,
                                            'NODE' : expressPhEDExSubscribeNode,
                                            'CUSTODIAL' : 1,
                                            'REQ_ONLY' : "n",
                                            'PRIO' : "high" } )

                subscriptions['Express'].append( { 'custodialSites' : [],
                                                   'nonCustodialSites' : [expressPhEDExSubscribeNode],
                                                   'autoApproveSites' : [expressPhEDExSubscribeNode],
                                                   'priority' : "high",
                                                   'primaryDataset' : dataset } )

        #
        # finally create WMSpec
        #
        outputs = {}
        if streamConfig.ProcessingStyle == "Bulk":
            taskName = "Repack"
            workflowName = "Repack_Run%d_Stream%s" % (run, stream)
            specArguments = getRepackArguments()
            specArguments['ProcessingVersion'] = streamConfig.Repack.ProcessingVersion
            specArguments['MaxSizeSingleLumi'] = streamConfig.Repack.MaxSizeSingleLumi
            specArguments['MaxSizeMultiLumi'] = streamConfig.Repack.MaxSizeMultiLumi
            specArguments['MinInputSize'] = streamConfig.Repack.MinInputSize
            specArguments['MaxInputSize'] = streamConfig.Repack.MaxInputSize
            specArguments['MaxEdmSize'] = streamConfig.Repack.MaxEdmSize
            specArguments['MaxOverSize'] = streamConfig.Repack.MaxOverSize
            specArguments['MaxInputEvents'] = streamConfig.Repack.MaxInputEvents
            specArguments['MaxInputFiles'] = streamConfig.Repack.MaxInputFiles
            specArguments['UnmergedLFNBase'] = "%s/t0temp/%s" % (runInfo['lfn_prefix'],
                                                                 runInfo['bulk_data_type'])
            specArguments['MergedLFNBase'] = "%s/%s" % (runInfo['lfn_prefix'],
                                                        runInfo['bulk_data_type'])
        elif streamConfig.ProcessingStyle == "Express":
            taskName = "Express"
            workflowName = "Express_Run%d_Stream%s" % (run, stream)
            specArguments = getExpressArguments()
            specArguments['ProcessingString'] = "Express"
            specArguments['ProcessingVersion'] = streamConfig.Express.ProcessingVersion
            specArguments['ProcScenario'] = streamConfig.Express.Scenario
            specArguments['GlobalTag'] = streamConfig.Express.GlobalTag
            specArguments['GlobalTagTransaction'] = "Express_%d" % run
            specArguments['MaxInputEvents'] = streamConfig.Express.MaxInputEvents
            specArguments['MaxInputSize'] = streamConfig.Express.MaxInputSize
            specArguments['MaxInputFiles'] = streamConfig.Express.MaxInputFiles
            specArguments['MaxLatency'] = streamConfig.Express.MaxLatency
            specArguments['AlcaSkims'] = streamConfig.Express.AlcaSkims
            specArguments['DqmSequences'] = streamConfig.Express.DqmSequences
            specArguments['UnmergedLFNBase'] = "%s/t0temp/express" % runInfo['lfn_prefix']
            specArguments['MergedLFNBase'] = "%s/express" % runInfo['lfn_prefix']
            specArguments['AlcaHarvestTimeout'] = runInfo['ah_timeout']
            specArguments['AlcaHarvestDir'] = runInfo['ah_dir']
            specArguments['DQMUploadProxy'] = dqmUploadProxy
            specArguments['DQMUploadUrl'] = runInfo['dqmuploadurl']
            specArguments['StreamName'] = stream

        if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]:
            specArguments['RunNumber'] = run
            specArguments['AcquisitionEra'] = tier0Config.Global.AcquisitionEra
            specArguments['CMSSWVersion'] = streamConfig.VersionOverride.get(onlineVersion, onlineVersion)
            specArguments['Outputs'] = outputModuleDetails
            specArguments['OverrideCatalog'] = "trivialcatalog_file:/afs/cern.ch/cms/SITECONF/T0_CH_CERN/Tier0/override_catalog.xml?protocol=override"
            specArguments['ValidStatus'] = "VALID"

        if streamConfig.ProcessingStyle == "Bulk":
            wmSpec = repackWorkload(workflowName, specArguments)
            wmSpec.setPhEDExInjectionOverride(runInfo['bulk_data_loc'])
            for subscription in subscriptions['Bulk']:
                wmSpec.setSubscriptionInformation(**subscription)
        elif streamConfig.ProcessingStyle == "Express":
            wmSpec = expressWorkload(workflowName, specArguments)
            wmSpec.setPhEDExInjectionOverride(expressPhEDExInjectNode)
            for subscription in subscriptions['Express']:
                wmSpec.setSubscriptionInformation(**subscription)

        if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]:
            wmSpec.setOwnerDetails("*****@*****.**", "T0",
                                   { 'vogroup': 'DEFAULT', 'vorole': 'DEFAULT',
                                     'dn' : "*****@*****.**" } )

            wmSpec.setupPerformanceMonitoring(maxRSS = 10485760, maxVSize = 10485760,
                                              softTimeout = 604800, gracePeriod = 3600)

            wmbsHelper = WMBSHelper(wmSpec, taskName, cachepath = specDirectory)

        filesetName = "Run%d_Stream%s" % (run, stream)
        fileset = Fileset(filesetName)

        #
        # create workflow (currently either repack or express)
        #
        try:
            myThread.transaction.begin()
            if len(bindsDataset) > 0:
                insertDatasetDAO.execute(bindsDataset, conn = myThread.transaction.conn, transaction = True)
            if len(bindsStreamDataset) > 0:
                insertStreamDatasetDAO.execute(bindsStreamDataset, conn = myThread.transaction.conn, transaction = True)
            if len(bindsRepackConfig) > 0:
                insertRepackConfigDAO.execute(bindsRepackConfig, conn = myThread.transaction.conn, transaction = True)
            if len(bindsPromptCalibration) > 0:
                insertPromptCalibrationDAO.execute(bindsPromptCalibration, conn = myThread.transaction.conn, transaction = True)
            if len(bindsExpressConfig) > 0:
                insertExpressConfigDAO.execute(bindsExpressConfig, conn = myThread.transaction.conn, transaction = True)
            if len(bindsSpecialDataset) > 0:
                insertSpecialDatasetDAO.execute(bindsSpecialDataset, conn = myThread.transaction.conn, transaction = True)
            if len(bindsDatasetScenario) > 0:
                insertDatasetScenarioDAO.execute(bindsDatasetScenario, conn = myThread.transaction.conn, transaction = True)
            if len(bindsCMSSWVersion) > 0:
                insertCMSSWVersionDAO.execute(bindsCMSSWVersion, conn = myThread.transaction.conn, transaction = True)
            if len(bindsStreamOverride) > 0:
                updateStreamOverrideDAO.execute(bindsStreamOverride, conn = myThread.transaction.conn, transaction = True)
            if len(bindsStorageNode) > 0:
                insertStorageNodeDAO.execute(bindsStorageNode, conn = myThread.transaction.conn, transaction = True)
            if len(bindsPhEDExConfig) > 0:
                insertPhEDExConfigDAO.execute(bindsPhEDExConfig, conn = myThread.transaction.conn, transaction = True)
            insertStreamStyleDAO.execute(bindsStreamStyle, conn = myThread.transaction.conn, transaction = True)
            if streamConfig.ProcessingStyle in [ 'Bulk', 'Express' ]:
                insertStreamFilesetDAO.execute(run, stream, filesetName, conn = myThread.transaction.conn, transaction = True)
                fileset.load()
                wmbsHelper.createSubscription(wmSpec.getTask(taskName), fileset, alternativeFilesetClose = True)
                insertWorkflowMonitoringDAO.execute([fileset.id],  conn = myThread.transaction.conn, transaction = True)
            if streamConfig.ProcessingStyle == "Bulk":
                bindsRecoReleaseConfig = []
                for fileset, primds in wmbsHelper.getMergeOutputMapping().items():
                    bindsRecoReleaseConfig.append( { 'RUN' : run,
                                                     'PRIMDS' : primds,
                                                     'FILESET' : fileset,
                                                     'RECODELAY' : promptRecoDelay[primds],
                                                     'RECODELAYOFFSET' : promptRecoDelayOffset[primds] } )
                insertRecoReleaseConfigDAO.execute(bindsRecoReleaseConfig, conn = myThread.transaction.conn, transaction = True)
            elif streamConfig.ProcessingStyle == "Express":
                markWorkflowsInjectedDAO.execute([workflowName], injected = True, conn = myThread.transaction.conn, transaction = True)
        except:
            myThread.transaction.rollback()
            raise
        else:
            myThread.transaction.commit()

    else:

        # should we do anything for local runs ?
        pass
    return